-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathIPL_ANALYSIS_SQL.sql
316 lines (202 loc) · 9.03 KB
/
IPL_ANALYSIS_SQL.sql
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
/***
STEPS:
1.CREATE DATABASE AND USE IT
2.IMPORT BOTH CSV FILE
3.ANALYSING DATA
4.WORING ON DIFFERENT QUESTIONS
***/
------------------------------------------------------------------------------
--SETUP DATABASE
CREATE DATABASE IPL_DATABASE_PROJECT;
USE IPL_DATABASE_PROJECT;
------------------------------------------------------------------------------
/*******SOME DATA ANALYSIS*******/
--SEEING IMPORTED TABLES
SELECT * FROM IPL_BALL;
SELECT * FROM IPL_MATCHES;
--NUMBER OF ROWS IN TABLES
SELECT COUNT(*) AS NO_OF_ROWS
FROM IPL_MATCHES;
SELECT COUNT(*) AS NO_OF_ROWS
FROM IPL_BALL;
--NUMBER OF COLUMNS IN TABLES
SELECT COUNT(*) as NO_OF_COLUMNS FROM information_schema.columns WHERE table_name = 'IPL_MATCHES';
SELECT COUNT(*) as NO_OF_COLUMNS FROM information_schema.columns WHERE table_name = 'IPL_BALL';
------------------------------------------------------------------------------
--ANALYSIS QUESTION
--Q.1) How many players have won player of the match award at least once
SELECT COUNT(DISTINCT(player_of_match)) AS PLAYER_OF_MATCH_COUNT FROM IPL_MATCHES;
------------------------------------------------------------------------------
--Q.2) Get details of top 5 matches which were won by maximum number of runs/result_margin.
--FIRST UPDATE NULL VALUES OF THIS COLUMN
UPDATE IPL_MATCHES
SET result_margin='0'
WHERE result_margin IS NULL;
--CHECKING TOP 5 MATCHES
SELECT TOP 5 * FROM IPL_MATCHES
ORDER BY result_margin DESC
------------------------------------------------------------------------------
--Q.3) Order the rows by city in which the match was played
SELECT*FROM IPL_MATCHES
ORDER BY city;
------------------------------------------------------------------------------
--Q.4) Find venue of 10 most recently played matches
SELECT TOP 10 venue FROM IPL_MATCHES
ORDER BY date DESC;
------------------------------------------------------------------------------
--Q.5) Return a column with comment based on total_runs
SELECT *,
CASE
WHEN total_runs=0 THEN 'DOT'
WHEN total_runs=1 THEN 'SINGLE'
WHEN total_runs=2 THEN 'DOUBLE'
WHEN total_runs=2 THEN 'THREE'
WHEN total_runs=4 THEN 'FOUR'
WHEN total_runs=5 THEN 'FIVE'
WHEN total_runs=6 THEN 'SIX'
END AS RUN_COMMENT
FROM IPL_BALL;
------------------------------------------------------------------------------
--Q.6) Create table deliveries_v02 with all the columns of deliveries and an additional column ball_result containing value boundary, dot or other depending on the total_run (boundary for >= 4, dot for 0 and other for any other number)
--FIRST INSERT NEW COLUMN IN IPL_BALL DATASET
ALTER TABLE IPL_BALL
ADD ball_result_label VARCHAR(50);
--UPDATE NEW COLUMN WITH VALUES
UPDATE IPL_BALL
SET ball_result_label=
CASE total_runs
WHEN 0 THEN 'DOT'
WHEN 1 THEN 'SINGLE'
WHEN 2 THEN 'DOUBLE'
WHEN 3 THEN 'THREE'
WHEN 4 THEN 'FOUR'
WHEN 5 THEN 'FIVE'
WHEN 6 THEN 'SIX'
END
WHERE total_runs IN (0,1,2,3,4,5,6);
--MAKE NEW TABLE deliveries_v02 FROM OLD TABLE IPL_BALL
SELECT * INTO deliveries_v02
FROM IPL_BALL;
--CHECKING TABLE IS CREATED OR NOT
SELECT *FROM deliveries_v02;
------------------------------------------------------------------------------
--Q.7) Write a query to fetch the total number of boundaries and dot balls.
SELECT ball_result_label,COUNT(*) AS COUNT_
FROM deliveries_v02
WHERE ball_result_label='SIX' OR ball_result_label='DOT' OR ball_result_label='FOUR'
GROUP BY ball_result_label
------------------------------------------------------------------------------
--Q.8) What is the highest runs by which any team won a match.
SELECT MAX(result_margin) AS WINNER_RUN_MARGIN FROM IPL_MATCHES;
------------------------------------------------------------------------------
--Q.9) On an average, teams won by how many runs in ipl.
SELECT ROUND(AVG(result_margin),2) AS AVERAGE_RUN_MARGIN FROM IPL_MATCHES;
------------------------------------------------------------------------------
--Q.10) How many extra runs were conceded in ipl by SK Warne
SELECT SUM(extra_runs) AS RUNS_CONCEDED FROM IPL_BALL
WHERE bowler='SK Warne';
------------------------------------------------------------------------------
--Q.11) How many boundaries (4s) and (6s) have been hit in ipl
SELECT total_runs,COUNT(total_runs) AS COUNT_
FROM IPL_BALL
WHERE total_runs=4 OR total_runs=6
GROUP BY total_runs;
------------------------------------------------------------------------------
--Q.12) How many balls did SK Warne bowl to batsman SR Tendulkar.
SELECT COUNT(*) AS COUNT_OF_BALL
FROM IPL_BALL
WHERE batsman='SR Tendulkar' AND bowler='SK Warne';
------------------------------------------------------------------------------
--Q.11) How many matches were played in the March and June
SELECT COUNT(*) AS COUNT_OF_MATCHES FROM IPL_MATCHES
WHERE MONTH(date)=03 OR MONTH(date)=06;
------------------------------------------------------------------------------
--Q.12) Add column season in IPL_MATCHES dataset and update this column taking year of match from date column.
--ADDING COLUMN
ALTER TABLE IPL_MATCHES
ADD season int;
--UPDATE COLUMN WITH YEAR
UPDATE IPL_MATCHES
SET season =YEAR(date)
------------------------------------------------------------------------------
--Q.13) Name the players who won player of match only once in this period.
SELECT player_of_match,COUNT(player_of_match) FROM IPL_MATCHES
GROUP BY player_of_match
HAVING COUNT(player_of_match)=1
------------------------------------------------------------------------------
--Q.14) Fetch data of all the matches played on 2nd May 2013
SELECT *FROM IPL_MATCHES
WHERE date= '2013-05-02';
------------------------------------------------------------------------------
--Q.15) Fetch data of all the matches where the margin of victory is more than 100 runs
SELECT *FROM IPL_MATCHES
WHERE result_margin>100;
------------------------------------------------------------------------------
--Q.16) Fetch data of all the matches where the final scores of both teams tied and order it in descending order of the date.
SELECT *FROM IPL_MATCHES
WHERE result='tie'
ORDER BY date DESC;
------------------------------------------------------------------------------
--Q.17) Get the count of cities that have hosted an IPL match
SELECT COUNT(DISTINCT city) AS COUNT_OF_CITIES
FROM IPL_MATCHES;
------------------------------------------------------------------------------
--Q.18) Write a query to fetch the total number of dismissals by dismissal kinds
SELECT dismissal_kind,COUNT(dismissal_kind)AS DISMISSAL_COUNT
FROM DELIVERIES
WHERE dismissal_kind != 'NA'
GROUP BY dismissal_kind
ORDER BY DISMISSAL_COUNT DESC;
------------------------------------------------------------------------------
--Q.19) Write a query to get the top 5 bowlers who conceded maximum extra runs
SELECT TOP 5 bowler, COUNT(extra_runs) AS COUNT_OF_EXTRA_RUNS
FROM IPL_BALL
GROUP BY bowler
ORDER BY COUNT_OF_EXTRA_RUNS DESC;
------------------------------------------------------------------------------
--Q.20) Write a query to fetch the total runs scored for each venue and order it in the descending order of total runs scored.
SELECT M.venue ,COUNT(B.total_runs) AS TOTAL_RUNS
FROM IPL_MATCHES AS M
JOIN
IPL_BALL AS B
ON M.id=B.id
GROUP BY M.venue
ORDER BY TOTAL_RUNS DESC;
------------------------------------------------------------------------------
--Q.21) Write a query to fetch the year-wise total runs scored at Eden Gardens and order it in the descending order of total runs scored
SELECT M.venue ,YEAR(M.date) AS YEAR_OF_MATCH,COUNT(B.total_runs) AS TOTAL_RUNS
FROM IPL_MATCHES AS M
JOIN
IPL_BALL AS B
ON M.id=B.id
WHERE M.venue='Eden Gardens'
GROUP BY M.venue,YEAR(M.date)
ORDER BY TOTAL_RUNS DESC;
------------------------------------------------------------------------------
--Q.22) Create table deliveries_v03 with all columns of deliveries_v02 and an additional column for row number partition over id.
--CREATING VIEW AND ADD COLUMN AND SAME TIME USED PARTITION BY
WITH CTE AS
(
SELECT *,
ROW_NUMBER() OVER (PARTITION BY id ORDER BY id ) AS r_nums
FROM deliveries_v02
)
UPDATE CTE
SET r_num = r_nums;
--CREATING TABLE FROM OLD TABLE
SELECT * INTO deliveries_v03
FROM deliveries_v02;
------------------------------------------------------------------------------
--Q.23) Use the r_num created in deliveries_v03 to identify instances where id is repeating .
SELECT *
FROM deliveries_v03
WHERE r_num = 2
------------------------------------------------------------------------------
--Q.24) Use subqueries to fetch data of all the ball_id which are repeating.
SELECT *
FROM deliveries_v03
WHERE id in ( SELECT id
FROM deliveries_v03
WHERE r_num=2 )
ORDER BY id
------------------------------------------------------------------------------