-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathz-score_test.sql
112 lines (90 loc) · 2.58 KB
/
z-score_test.sql
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
--1. Overall Z-scores for each observation
--1a. numeric values
with tab1 as (
select
avg(width) as mean_w,
stdev(width) as std_w,
avg(depth) as mean_d,
stdev(depth) as std_d,
avg(height) as mean_h,
stdev(height) as std_h,
avg(weight) as mean_we,
stdev(weight) as std_we
from products)
select ref,
width,
abs(width - tab1.mean_w) / tab1.std_w as z_score_width,
depth,
abs(depth - tab1.mean_d) / tab1.std_d as z_score_depth,
height,
abs(height - tab1.mean_h) / tab1.std_h as z_score_height,
weight,
abs(weight - tab1.mean_we) / tab1.std_we as z_score_weight
from tab1, products;
--1b. boolean values
with tab1 as (
select
avg(width) as mean_w,
stdev(width) as std_w,
avg(depth) as mean_d,
stdev(depth) as std_d,
avg(height) as mean_h,
stdev(height) as std_h,
avg(weight) as mean_we,
stdev(weight) as std_we
from products)
select ref,
width,
case when abs(width - tab1.mean_w) / tab1.std_w > 3 then 'true' else 'false' end as z_score_width,
depth,
case when abs(depth - tab1.mean_d) / tab1.std_d > 3 then 'true' else 'false' end as z_score_depth,
height,
case when abs(height - tab1.mean_h) / tab1.std_h > 3 then 'true' else 'false' end as z_score_height,
weight,
case when abs(weight - tab1.mean_we) / tab1.std_we > 3 then 'true' else 'false' end as z_score_weight
from tab1, products;
--2. Number of outliers for each dimension
with tab1 as (
select
avg(width) as mean_w,
stdev(width) as std_w,
avg(depth) as mean_d,
stdev(depth) as std_d,
avg(height) as mean_h,
stdev(height) as std_h,
avg(weight) as mean_we,
stdev(weight) as std_we
from products)
select count(ref) as count_outliers, 'width' as dim
from tab1, products
where abs(width - tab1.mean_w) / tab1.std_w > 3
union all
select count(ref), 'depth'
from tab1, products
where abs(depth - tab1.mean_d) / tab1.std_d > 3
union all
select count(ref), 'height'
from tab1, products
where abs(height - tab1.mean_h) / tab1.std_h > 3
union all
select count(ref), 'weight'
from tab1, products
where abs(weight - tab1.mean_we) / tab1.std_we > 3;
--3. Number of rows having any outlier in feature
with tab1 as (
select
avg(width) as mean_w,
stdev(width) as std_w,
avg(depth) as mean_d,
stdev(depth) as std_d,
avg(height) as mean_h,
stdev(height) as std_h,
avg(weight) as mean_we,
stdev(weight) as std_we
from products)
select count(ref) as count_all_outliers
from tab1, products
where abs(width - tab1.mean_w) / tab1.std_w > 3
or abs(depth - tab1.mean_d) / tab1.std_d > 3
or abs(height - tab1.mean_h) / tab1.std_h > 3
or abs(weight - tab1.mean_we) / tab1.std_we > 3;