forked from billpsomas/rscir
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathnormalize_similarities.sql
89 lines (88 loc) · 2.3 KB
/
normalize_similarities.sql
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
-- Yeah this is poorly written but w/e
with zomg_query as (
with image_query as (
with img_mean as (
with image_query as (
select embedding from rscir_images ri where ri.filename = 'tenniscourt723.jpg'
)
select avg(
(embedding <#> (select * from image_query)) * -1
)
from rscir_images
),
img_std as (
with image_query as (
select embedding from rscir_images ri where ri.filename = 'tenniscourt723.jpg'
)
select stddev(
(embedding <#> (select * from image_query)) * -1
)
from rscir_images
),
img_similarity as (
with image_query as (
select embedding from rscir_images ri where ri.filename = 'tenniscourt723.jpg'
)
select *, (
(embedding <#> (select * from image_query)) * -1
) as img_similarity
from rscir_images ri order by img_similarity desc
)
select *,
(select * from img_mean) as img_mean,
(select * from img_std) as img_std,
0.5 * (
1 + erf(
(img_similarity - (select * from img_mean)) / (sqrt(2) * (select * from img_std))
)
) as img_sim_norm
from img_similarity
),
text_query as (
with text_mean as (
with text_query as (
select embedding from rscir_words rw where rw.word = 'red'
)
select avg(
(embedding <#> (select * from text_query)) * -1
) from rscir_images ri
),
text_std as (
with text_query as (
select embedding from rscir_words rw where rw.word = 'red'
)
select stddev(
(embedding <#> (select * from text_query)) * -1
) from rscir_images ri
),
text_similarity as (
with text_query as (
select embedding from rscir_words rw where rw.word = 'red'
)
select *, (
(embedding <#> (select * from text_query)) * -1
) as txt_sim
from rscir_images ri order by txt_sim desc
)
select *,
(select * from text_mean) as text_mean,
(select * from text_std) as text_std,
0.5 * (
1 + erf(
(txt_sim - (select * from text_mean)) / (sqrt(2) * (select * from text_std))
)
) as txt_sim_norm
from text_similarity
)
select
image_query.filename,
image_query.url,
image_query.img_sim_norm,
text_query.txt_sim_norm
from image_query join text_query on image_query.filename=text_query.filename
)
select *,
(0.4 * img_sim_norm) + (0.6 * txt_sim_norm) as weighted_sim -- lambda=0.6
from zomg_query
order by weighted_sim desc
limit 3;