-
Notifications
You must be signed in to change notification settings - Fork 0
/
Toronto SQL file.sql
162 lines (122 loc) · 6.05 KB
/
Toronto SQL file.sql
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
-- Create Database Project
Create Database Project;
-- Use Database Project
Use Project;
-- Importing CSV of Toronto
-- Read top 10 Rows from Toronto CSV
select top 10 * from [dbo].[df_toronto_availability]
select top 10 * from [dbo].[host_toronto_df]
Select top 10 * from [dbo].[listing_toronto_df]
Select top 10 * from [dbo].[review_toronto_df]
-- Checking no of rows in each tables
Select COUNT(*)as no_of_rows from [dbo].[host_t]
--Finding Hosts Acceptance rate & response rate
Select
Avg(host_response_rate) As host_response_rate,
Avg(host_acceptance_rate) as host_acceptance_rate,
host_is_superhost from [dbo].[host_toronto_df]
where host_response_rate is not null or host_acceptance_rate is not null
group by host_is_superhost;
-- Checking Hosts having Profile picture
select host_is_superhost,sum(true) as Host_have_profile_pic,
sum(false)as Host_dont_have_profile_pic
from(
select host_is_superhost,[TRUE],[FALSE]
from [dbo].[host_toronto_df]
pivot (count(host_id) for
host_has_profile_pic in ([TRUE],[FALSE]))a)a
group by host_is_superhost
-- Checking Hosts having identity verified
select host_is_superhost,sum(true) as Verified,
sum(false)as Not_verified from(
select host_is_superhost,[TRUE],[FALSE]
from [dbo].[host_toronto_df]
pivot (count(host_id) for
host_Identity_verified in ([TRUE],[FALSE]))a)a
group by host_is_superhost
-- Checking if Customer can do instant booking
Select B.Host_is_superhost,A.instant_bookable,
Count(A.HOST_ID) AS count
From listing_toronto_df A Inner Join host_toronto_df B
ON A.HOST_ID = B.HOST_ID
Group by Host_is_superhost,instant_bookable
-- Checking Customer review scores
Select B.Host_is_superhost,
avg(A.review_scores_value) as review_scores_value
From listing_toronto_df A Inner Join host_toronto_df B
ON A.HOST_ID = B.HOST_ID
Group by Host_is_superhost
-- Calculate average no of bookings per month.
Select MONTH,host_is_superhost,
Avg(Total_Bookings)Over(Partition by Month,host_is_superhost
Order by Month) as Avg_bookings
from(
Select month(a.date) as Month,
Count(B.id) as Total_Bookings,C.host_is_superhost
from df_toronto_availability A Inner Join listing_toronto_df B ON A.listing_id = B.id
Inner join host_toronto_df C ON B.Host_ID = C.HOST_ID
where host_is_superhost is not null AND A.available = 'FALSE'
group by month(date),Year(Date),C.host_is_superhost )c
-- Analyzing comments of reviewers varies for listings of Super Hosts vs Other Hosts
Select Sum(Total_Comments) from (
Select A.comments,Count(B.Host_ID) As Total_comments
from review_toronto_df A Inner Join listing_toronto_df B ON A.listing_id = B.id
Inner join host_toronto_df C ON B.Host_ID = C.HOST_ID
where C.Host_is_superhost = 'False' and A.comments like '%Beautiful%' or A.comments like '%Fantastic%'
or A.comments like '%100%%' or A.comments like '%10/10%'
or A.comments like '%11/10%' or A.comments like '%12/10%'
or A.comments like '%5 star%' or A.comments like '%5 of 5 stars%'
or A.comments like '%Great%' or A.comments like '%Clean%'
or A.comments like '%amazing%' or A.comments like '%Wonderful%'
or A.comments like '%5star%' or A.comments like '%best%'
or A.comments like '%Definitely%' or A.comments like '%excellent%'
or A.comments like '%Awesome%' or A.comments like '%excellent%'
or A.comments like '%Decent%'or A.comments like '%Well%'
or A.comments like '%quiet%' or A.comments like '%love%'
group by A.comments)c
-- Analyzing large property types For Normal-hosts
Select A.Property_type,Count(A.Host_ID) as Total_Property_Otherhost
From listing_toronto_df A Inner Join host_toronto_df B
ON A.HOST_ID = B.HOST_ID
where B.Host_is_superhost = 'False' and property_type like '%ENTIRE%'
group by A.Property_type
-- Analyzing large property types For Superhost
Select A.Property_type,Count(A.Host_ID) as Total_Property_Superhost
From listing_toronto_df A Inner Join host_toronto_df B
ON A.HOST_ID = B.HOST_ID where B.Host_is_superhost = 'TRUE' and property_type like '%ENTIRE%'
group by A.Property_type
-- Analyzing Average price of the listings
Create procedure p1 @superhost nvarchar(10) as begin
Select * from (
Select A.listing_id,AVG(A.Price) AS AVG,
YEAR(date) AS YEAR
from df_toronto_availability A Inner Join
listing_toronto_df B ON A.listing_id = B.id
Inner join host_toronto_df C ON B.Host_ID = C.HOST_ID
WHERE C.host_is_superhost = @superhost
GROUP BY A.listing_id,YEAR(date))c
PIVOT(AVG(AVG) for Year IN ([2022],[2023])) as PVT2
end;
exec p1 'TRUE';
-- Analyzing AVAILABILITY of the listings
Select A.listing_id,count(A.available) as ava,YEAR(A.date) AS YEAR into #mmm
from df_toronto_availability as A Inner Join listing_toronto_df as B ON A.listing_id = B.id
Inner join host_toronto_df as C ON B.Host_ID = C.HOST_ID WHERE C.host_is_superhost = 'True' and A.available='True'
GROUP BY A.listing_id,YEAR(A.date)
Select A.listing_id,count(A.available) as total,YEAR(A.date) AS YEAR into #nnn
from df_toronto_availability as A Inner Join listing_toronto_df as B ON A.listing_id = B.id
Inner join host_toronto_df as C ON B.Host_ID = C.HOST_ID WHERE C.host_is_superhost = 'True'
GROUP BY A.listing_id,YEAR(A.date)
select top 10* from #mmm order by listing_id,year
select top 10* from #nnn order by listing_id,year
select A.listing_id , (A.ava)*100/B.total as per,A.year from #mmm as A inner join #nnn as B on
A.listing_id=B.listing_id where A.year=B.year
order by A.listing_id, A.year
-- For NON-LOCAL hosts
Select A.Host_id, Avg(host_response_rate) As host_response_rate, Avg(host_acceptance_rate)
as host_acceptance_rate from [dbo].[host_toronto_df] A Inner Join #c B ON B.host_id = A.host_id
where host_response_rate is not null or host_acceptance_rate is not null GROUP BY A.host_id;
-- For LOCAL hosts
Select A.Host_id, Avg(host_response_rate) As host_response_rate,Avg(host_acceptance_rate)
as host_acceptance_rate from [dbo].[host_toronto_df] B Inner Join #A A ON A.host_id = B.host_id
where host_response_rate is not null or host_acceptance_rate is not null GROUP BY A.host_id;