#Web Scraping ESPN Public Data
import pandas
import pandas as pd
import requests
pd.set_option('display.max_columns', None) #so we can see all columns in a wide dataframe
import time
import numpy as np
url_2023_season = 'https://site.web.api.espn.com/apis/common/v3/sports/golf/pga/statistics/byathlete?region=us&lang=en&contentorigin=espn&isqualified=true&page=1&limit=50&sort=general.amount%3Adesc&season=2023'
url_2022_season = 'https://site.web.api.espn.com/apis/common/v3/sports/golf/pga/statistics/byathlete?region=us&lang=en&contentorigin=espn&isqualified=true&page=1&limit=50&sort=general.amount%3Adesc&season=2022'
url_2021_season = 'https://site.web.api.espn.com/apis/common/v3/sports/golf/pga/statistics/byathlete?region=us&lang=en&contentorigin=espn&isqualified=true&page=1&limit=50&sort=general.amount%3Adesc&season=2021'
r_2023 = requests.get(url=url_2023_season).json()
r_2022 = requests.get(url=url_2022_season).json()
r_2021 = requests.get(url=url_2021_season).json()
#combine into a collection of dictionaries
all_dicts = [r_2021, r_2022, r_2023]
#adding the value headers manually
table_headers = ['Earnings','Cup','Evnts','Rnds','Cuts','Top10','Wins','Score','DDIS','DACC','GIR','PUTTS','SAND','BIRDS']
print(table_headers)
['Earnings', 'Cup', 'Evnts', 'Rnds', 'Cuts', 'Top10', 'Wins', 'Score', 'DDIS', 'DACC', 'GIR', 'PUTTS', 'SAND', 'BIRDS']
#pull stats for 2023
values_2023 =[]
for athlete in r_2023['athletes']:
for category in athlete['categories']:
x = category['totals'][0:14]
values_2023.append(x)
#turn into dataframe
df_values_2023 = pd.DataFrame(values_2023, columns=table_headers)
#player info is in a different section
info_categories = r_2023['athletes'][0]['athlete'].keys()
info_2023 = []
for athlete in r_2023['athletes']:
row = []
for category in info_categories:
if category in athlete['athlete']:
row.append(athlete['athlete'][category])
else:
row.append(None)
info_2023.append(row)
#create a player info df
info_df_2023 = pd.DataFrame(info_2023, columns=info_categories)
#combine both dataframes
df_2023 = pd.concat([info_df_2023,df_values_2023], axis=1)
#repeat for seasons 2021 and 2022
values_2022 =[]
for athlete in r_2022['athletes']:
for category in athlete['categories']:
x = category['totals'][0:14]
values_2022.append(x)
df_values_2022 = pd.DataFrame(values_2022, columns=table_headers)
#2022 athletes
info_categories_2022 = r_2022['athletes'][0]['athlete'].keys()
info_2022 = []
for athlete in r_2022['athletes']:
row = []
for category in info_categories_2022:
if category in athlete['athlete']:
row.append(athlete['athlete'][category])
else:
row.append(None)
info_2022.append(row)
info_df_2022 = pd.DataFrame(info_2022, columns=info_categories)
df_2022 = pd.concat([info_df_2022,df_values_2022], axis=1)
#2021
values_2021 =[]
for athlete in r_2021['athletes']:
for category in athlete['categories']:
x = category['totals'][0:14]
values_2021.append(x)
df_values_2021 = pd.DataFrame(values_2021, columns=table_headers)
#2021 athletes
info_categories_2021 = r_2021['athletes'][0]['athlete'].keys()
info_2021 = []
for athlete in r_2021['athletes']:
row = []
for category in info_categories_2021:
if category in athlete['athlete']:
row.append(athlete['athlete'][category])
else:
row.append(None)
info_2021.append(row)
info_df_2021 = pd.DataFrame(info_2021, columns=info_categories)
df_2021 = pd.concat([info_df_2021,df_values_2021], axis=1)
#combine all three dataframes using concat
golf_df_concat = pd.concat([df_2021, df_2022, df_2023])
#reformat on earnings, we'll have to remove commas and $ signs
golf_df_concat['Earnings'] = golf_df_concat['Earnings'].str.replace(',', '').str.replace('$', '')
/Users/joeyglenn4/opt/anaconda3/lib/python3.7/site-packages/ipykernel_launcher.py:2: FutureWarning: The default value of regex will change from True to False in a future version. In addition, single character regular expressions will *not* be treated as literal strings when regex=True.
#reformat categories from strings to numbers
golf_df_concat[['Earnings', 'Cup', 'Evnts', 'Rnds', 'Cuts', 'Top10', 'Wins']] = golf_df_concat[['Earnings', 'Cup', 'Evnts', 'Rnds', 'Cuts', 'Top10', 'Wins']].apply(pd.to_numeric)
golf_df_concat['Score'] = pd.to_numeric(golf_df_concat['Score'], errors='coerce')
golf_df_concat['DDIS'] = pd.to_numeric(golf_df_concat['DDIS'], errors='coerce')
golf_df_concat['DACC'] = pd.to_numeric(golf_df_concat['DACC'], errors='coerce')
golf_df_concat['GIR'] = pd.to_numeric(golf_df_concat['GIR'], errors='coerce')
golf_df_concat['PUTTS'] = pd.to_numeric(golf_df_concat['PUTTS'], errors='coerce')
golf_df_concat['SAND'] = pd.to_numeric(golf_df_concat['SAND'], errors='coerce')
golf_df_concat['BIRDS'] = pd.to_numeric(golf_df_concat['BIRDS'], errors='coerce')
golf_df_agg = golf_df_concat.groupby('displayName').agg({'Earnings':'sum','Cup':'sum','Evnts':'sum','Rnds':'sum','Cuts':'sum','Top10':'sum','Wins':'sum','Score':'mean','DDIS':'mean','DACC':'mean','GIR':'mean','PUTTS':'mean','SAND':'mean','BIRDS':'mean'})
#Quick Save
golf_df_agg.to_excel('golf_player_data.xlsx', index=True)
#Data check
golf_df_agg.head()
<style scoped>
.dataframe tbody tr th:only-of-type {
vertical-align: middle;
}
.dataframe tbody tr th {
vertical-align: top;
}
.dataframe thead th {
text-align: right;
}
</style>
|
Earnings |
Cup |
Evnts |
Rnds |
Cuts |
Top10 |
Wins |
Score |
DDIS |
DACC |
GIR |
PUTTS |
SAND |
BIRDS |
displayName |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
Aaron Wise |
3454521 |
-12 |
24 |
86 |
19 |
4 |
0 |
69.40 |
308.30 |
59.2 |
69.10 |
1.748 |
0.0 |
43.00 |
Abraham Ancer |
5816565 |
-10 |
27 |
98 |
24 |
9 |
1 |
69.50 |
290.40 |
71.1 |
69.30 |
1.732 |
0.0 |
50.80 |
Adam Hadwin |
4373841 |
1182 |
39 |
134 |
31 |
8 |
0 |
69.95 |
295.45 |
63.8 |
67.75 |
1.746 |
0.0 |
55.35 |
Adam Schenk |
1803965 |
584 |
18 |
61 |
12 |
1 |
0 |
70.40 |
304.50 |
53.5 |
66.00 |
1.808 |
0.0 |
50.90 |
Adam Scott |
2913198 |
-4 |
20 |
72 |
18 |
5 |
0 |
70.10 |
312.40 |
55.3 |
67.70 |
1.742 |
0.0 |
52.70 |
<style scoped>
.dataframe tbody tr th:only-of-type {
vertical-align: middle;
}
.dataframe tbody tr th {
vertical-align: top;
}
.dataframe thead th {
text-align: right;
}
</style>
|
Earnings |
Cup |
Evnts |
Rnds |
Cuts |
Top10 |
Wins |
Score |
DDIS |
DACC |
GIR |
PUTTS |
SAND |
BIRDS |
displayName |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
Viktor Hovland |
13957192 |
713 |
56 |
193 |
50 |
17 |
2 |
69.566667 |
305.133333 |
63.933333 |
67.566667 |
1.735000 |
0.0 |
51.10 |
Webb Simpson |
2783012 |
1195 |
21 |
74 |
18 |
6 |
0 |
69.200000 |
292.500000 |
67.100000 |
69.400000 |
1.734000 |
0.0 |
61.70 |
Will Zalatoris |
12892434 |
3680 |
49 |
163 |
38 |
17 |
1 |
69.750000 |
311.050000 |
55.900000 |
70.200000 |
1.755500 |
0.0 |
48.55 |
Wyndham Clark |
1948049 |
535 |
15 |
56 |
13 |
4 |
0 |
69.700000 |
313.000000 |
54.000000 |
67.900000 |
1.734000 |
0.0 |
52.80 |
Xander Schauffele |
15608924 |
585 |
53 |
181 |
47 |
20 |
3 |
69.400000 |
305.300000 |
59.433333 |
69.200000 |
1.726667 |
0.0 |
61.10 |
#what are the lowest/highest average scores
golf_df_agg.describe()
<style scoped>
.dataframe tbody tr th:only-of-type {
vertical-align: middle;
}
.dataframe tbody tr th {
vertical-align: top;
}
.dataframe thead th {
text-align: right;
}
</style>
|
Earnings |
Cup |
Evnts |
Rnds |
Cuts |
Top10 |
Wins |
Score |
DDIS |
DACC |
GIR |
PUTTS |
SAND |
BIRDS |
count |
9.200000e+01 |
92.000000 |
92.000000 |
92.000000 |
92.000000 |
92.000000 |
92.000000 |
92.000000 |
92.000000 |
92.000000 |
92.000000 |
92.000000 |
92.0 |
92.000000 |
mean |
6.776147e+06 |
1015.673913 |
33.836957 |
112.554348 |
26.097826 |
8.152174 |
1.152174 |
69.535326 |
300.308514 |
60.072283 |
66.812138 |
1.734138 |
0.0 |
52.900362 |
std |
5.671968e+06 |
825.992618 |
17.431358 |
59.594853 |
14.576106 |
6.182099 |
1.459601 |
3.709235 |
17.933058 |
4.986574 |
3.976978 |
0.094267 |
0.0 |
6.790910 |
min |
1.730193e+06 |
-18.000000 |
11.000000 |
34.000000 |
7.000000 |
1.000000 |
0.000000 |
34.650000 |
147.750000 |
35.550000 |
35.250000 |
0.872500 |
0.0 |
27.100000 |
25% |
2.903400e+06 |
546.250000 |
20.000000 |
65.000000 |
14.000000 |
4.000000 |
0.000000 |
69.600000 |
296.000000 |
56.825000 |
65.875000 |
1.729917 |
0.0 |
48.900000 |
50% |
4.533484e+06 |
817.500000 |
27.500000 |
91.000000 |
20.500000 |
6.000000 |
1.000000 |
69.833333 |
302.616667 |
60.575000 |
67.025000 |
1.742000 |
0.0 |
52.400000 |
75% |
8.521836e+06 |
1311.000000 |
47.250000 |
154.500000 |
37.250000 |
10.000000 |
1.000000 |
70.137500 |
307.075000 |
63.037500 |
68.850000 |
1.757250 |
0.0 |
56.950000 |
max |
3.061599e+07 |
3680.000000 |
75.000000 |
260.000000 |
64.000000 |
30.000000 |
6.000000 |
71.900000 |
323.700000 |
71.100000 |
72.066667 |
1.808000 |
0.0 |
78.300000 |
#Minimum average score is 34.55, lowest score in PGA was shot by Furyk (58)
#find that entry
unreal_golfer = golf_df_agg[golf_df_agg['Score'] == 34.55]
print(unreal_golfer)
Empty DataFrame
Columns: [Earnings, Cup, Evnts, Rnds, Cuts, Top10, Wins, Score, DDIS, DACC, GIR, PUTTS, SAND, BIRDS]
Index: []
#Tom Kim is good... but he's not that good
unreal_golfer_2023 = df_2023[df_2023['displayName'] == 'Tom Kim']
print(unreal_golfer_2023)
id uid guid firstName \
19 4602673 s:1100~a:4602673 cce7aed3-de73-3aef-886c-688ba5f9ea88 Tom
lastName displayName shortName debutYear \
19 Kim Tom Kim T. Kim NaN
links \
19 [{'language': 'en-US', 'rel': ['playercard', '...
headshot \
19 {'href': 'https://a.espncdn.com/i/headshots/go...
status age \
19 {'id': '1', 'name': 'Active', 'type': 'active'... 20
flag Earnings Cup Evnts \
19 {'href': 'https://a.espncdn.com/i/teamlogos/co... $3,237,766 917 12
Rnds Cuts Top10 Wins Score DDIS DACC GIR PUTTS SAND BIRDS
19 42 11 3 1 69.3 295.5 71.1 70.5 1.745 0 54.2
#ESPN incorrectly entered his stats as 0 in 2021-22
unreal_golfer_2022 = df_2022[df_2022['displayName'] == 'Tom Kim']
print(unreal_golfer_2022)
id uid guid firstName \
44 4602673 s:1100~a:4602673 cce7aed3-de73-3aef-886c-688ba5f9ea88 Tom
lastName displayName shortName debutYear \
44 Kim Tom Kim T. Kim NaN
links \
44 [{'language': 'en-US', 'rel': ['playercard', '...
headshot \
44 {'href': 'https://a.espncdn.com/i/headshots/go...
status age \
44 {'id': '1', 'name': 'Active', 'type': 'active'... 20
flag Earnings Cup Evnts \
44 {'href': 'https://a.espncdn.com/i/teamlogos/co... $2,824,580 1154 11
Rnds Cuts Top10 Wins Score DDIS DACC GIR PUTTS SAND BIRDS
44 0 10 3 1 00.0 0.0 0.0 0.0 0.000 0 0.0
#Clean record, locking in 2023 stats. You could argue that this is inconsistent, but Tom Kim won't qualify for the final dataset anyways.
#The dataset will be filtered for American candidates for the Ryder Cup.
golf_df_agg.loc['Tom Kim']
Earnings 6.062346e+06
Cup 2.071000e+03
Evnts 2.300000e+01
Rnds 4.200000e+01
Cuts 2.100000e+01
Top10 6.000000e+00
Wins 2.000000e+00
Score 3.465000e+01
DDIS 1.477500e+02
DACC 3.555000e+01
GIR 3.525000e+01
PUTTS 8.725000e-01
SAND 0.000000e+00
BIRDS 2.710000e+01
Name: Tom Kim, dtype: float64
golf_df_agg.loc['Tom Kim', "Score"] = 69.1
golf_df_agg.loc['Tom Kim', "DDIS"] = 295.1
golf_df_agg.loc['Tom Kim', "DACC"] = 69.7
golf_df_agg.loc['Tom Kim', "GIR"] = 70.5
golf_df_agg.loc['Tom Kim', "PUTTS"] = 1.737
golf_df_agg.loc['Tom Kim', "SAND"] = 0
golf_df_agg.loc['Tom Kim', "BIRDS"] = 55.8
#Check Tom's updated stats
golf_df_agg.loc['Tom Kim']
Earnings 6062346.000
Cup 2071.000
Evnts 23.000
Rnds 42.000
Cuts 21.000
Top10 6.000
Wins 2.000
Score 69.100
DDIS 295.100
DACC 69.700
GIR 70.500
PUTTS 1.737
SAND 0.000
BIRDS 55.800
Name: Tom Kim, dtype: float64
#check for any outliers
golf_df_agg.describe()
<style scoped>
.dataframe tbody tr th:only-of-type {
vertical-align: middle;
}
.dataframe tbody tr th {
vertical-align: top;
}
.dataframe thead th {
text-align: right;
}
</style>
|
Earnings |
Cup |
Evnts |
Rnds |
Cuts |
Top10 |
Wins |
Score |
DDIS |
DACC |
GIR |
PUTTS |
SAND |
BIRDS |
count |
9.200000e+01 |
92.000000 |
92.000000 |
92.000000 |
92.000000 |
92.000000 |
92.000000 |
92.000000 |
92.000000 |
92.000000 |
92.000000 |
92.000000 |
92.0 |
92.000000 |
mean |
6.776147e+06 |
1015.673913 |
33.836957 |
112.554348 |
26.097826 |
8.152174 |
1.152174 |
69.909783 |
301.910145 |
60.443478 |
67.195290 |
1.743534 |
0.0 |
53.212319 |
std |
5.671968e+06 |
825.992618 |
17.431358 |
59.594853 |
14.576106 |
6.182099 |
1.459601 |
0.495253 |
7.971194 |
4.374593 |
2.206917 |
0.025273 |
0.0 |
6.228607 |
min |
1.730193e+06 |
-18.000000 |
11.000000 |
34.000000 |
7.000000 |
1.000000 |
0.000000 |
68.933333 |
280.700000 |
50.800000 |
57.800000 |
1.646000 |
0.0 |
42.600000 |
25% |
2.903400e+06 |
546.250000 |
20.000000 |
65.000000 |
14.000000 |
4.000000 |
0.000000 |
69.600000 |
296.000000 |
57.050000 |
65.975000 |
1.730250 |
0.0 |
49.075000 |
50% |
4.533484e+06 |
817.500000 |
27.500000 |
91.000000 |
20.500000 |
6.000000 |
1.000000 |
69.833333 |
302.616667 |
60.600000 |
67.075000 |
1.742000 |
0.0 |
52.600000 |
75% |
8.521836e+06 |
1311.000000 |
47.250000 |
154.500000 |
37.250000 |
10.000000 |
1.000000 |
70.137500 |
307.075000 |
63.162500 |
69.016667 |
1.757250 |
0.0 |
56.950000 |
max |
3.061599e+07 |
3680.000000 |
75.000000 |
260.000000 |
64.000000 |
30.000000 |
6.000000 |
71.900000 |
323.700000 |
71.100000 |
72.066667 |
1.808000 |
0.0 |
78.300000 |
df_2023_2 = df_2023.set_index('displayName')
#create a new column for golfers who are American and currently in the top 50
American_names = ['Scottie Scheffler','Max Homa','Kurt Kitayama','Keegan Bradley','Tom Hoge','Collin Morikawa','Patrick Cantlay','Tony Finau','Sahith Theegala','Rickie Fowler','Taylor Moore','Chris Kirk','Harris English','Jordan Spieth','Brian Harman','Taylor Montgomery','Russel Henley','Justin Thomas','Keith Mitchell','Andrew Putnam','Xander Schauffele','Wyndham Clark','Adam Schenk','Sam Burns','Denny McCarthy','Brendon Todd','Hayden Buckley','Justin Suh','Brandon Wu','Davis Thompson']
golf_df_agg['is_qualified'] = False
golf_df_agg.loc[golf_df_agg.index.isin(American_names), 'is_qualified'] = True
#remove all unqualified candidates, also first cut is a pun
first_cut = golf_df_agg[golf_df_agg['is_qualified'] == True]
first_cut
<style scoped>
.dataframe tbody tr th:only-of-type {
vertical-align: middle;
}
.dataframe tbody tr th {
vertical-align: top;
}
.dataframe thead th {
text-align: right;
}
</style>
|
Earnings |
Cup |
Evnts |
Rnds |
Cuts |
Top10 |
Wins |
Score |
DDIS |
DACC |
GIR |
PUTTS |
SAND |
BIRDS |
is_qualified |
displayName |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
Adam Schenk |
1803965 |
584 |
18 |
61 |
12 |
1 |
0 |
70.400000 |
304.500000 |
53.500000 |
66.000000 |
1.808000 |
0.0 |
50.900000 |
True |
Andrew Putnam |
2314939 |
679 |
17 |
57 |
13 |
3 |
0 |
69.500000 |
283.100000 |
63.000000 |
68.700000 |
1.755000 |
0.0 |
61.700000 |
True |
Brendon Todd |
1742013 |
522 |
14 |
47 |
9 |
3 |
0 |
69.900000 |
280.700000 |
66.700000 |
65.500000 |
1.717000 |
0.0 |
59.700000 |
True |
Brian Harman |
8816347 |
1899 |
69 |
234 |
54 |
13 |
0 |
69.800000 |
292.466667 |
67.166667 |
66.633333 |
1.755000 |
0.0 |
59.966667 |
True |
Chris Kirk |
3177760 |
1024 |
15 |
48 |
11 |
4 |
1 |
69.800000 |
295.800000 |
61.600000 |
66.600000 |
1.697000 |
0.0 |
63.900000 |
True |
Collin Morikawa |
15965913 |
3040 |
53 |
174 |
43 |
20 |
2 |
69.600000 |
296.533333 |
68.700000 |
69.966667 |
1.743667 |
0.0 |
52.300000 |
True |
Denny McCarthy |
4693126 |
1591 |
45 |
154 |
35 |
7 |
0 |
69.700000 |
293.300000 |
61.850000 |
65.900000 |
1.740500 |
0.0 |
55.350000 |
True |
Harris English |
9063496 |
530 |
43 |
145 |
33 |
10 |
2 |
70.000000 |
296.850000 |
62.500000 |
64.750000 |
1.742000 |
0.0 |
49.300000 |
True |
Hayden Buckley |
1836289 |
594 |
14 |
44 |
8 |
3 |
0 |
69.800000 |
304.500000 |
64.400000 |
69.400000 |
1.755000 |
0.0 |
45.600000 |
True |
Jordan Spieth |
14858548 |
612 |
58 |
198 |
47 |
19 |
2 |
69.866667 |
303.300000 |
55.133333 |
66.133333 |
1.728667 |
0.0 |
56.366667 |
True |
Justin Thomas |
15326162 |
345 |
53 |
194 |
48 |
19 |
2 |
69.600000 |
307.633333 |
55.966667 |
66.933333 |
1.729667 |
0.0 |
60.766667 |
True |
Keegan Bradley |
10811950 |
3305 |
64 |
204 |
45 |
14 |
1 |
70.133333 |
301.766667 |
62.100000 |
67.533333 |
1.756667 |
0.0 |
46.166667 |
True |
Keith Mitchell |
5093720 |
1513 |
40 |
134 |
32 |
9 |
0 |
69.700000 |
310.850000 |
63.750000 |
67.250000 |
1.750000 |
0.0 |
54.700000 |
True |
Kurt Kitayama |
5693388 |
1040 |
12 |
34 |
7 |
3 |
1 |
70.400000 |
306.400000 |
55.000000 |
66.500000 |
1.764000 |
0.0 |
57.100000 |
True |
Max Homa |
16514432 |
3053 |
63 |
208 |
50 |
16 |
5 |
69.933333 |
302.733333 |
60.600000 |
65.800000 |
1.724333 |
0.0 |
51.000000 |
True |
Patrick Cantlay |
21172810 |
784 |
53 |
176 |
45 |
23 |
6 |
69.100000 |
306.366667 |
62.433333 |
70.566667 |
1.737000 |
0.0 |
53.533333 |
True |
Rickie Fowler |
3209972 |
741 |
12 |
42 |
11 |
4 |
0 |
69.600000 |
305.500000 |
57.700000 |
69.400000 |
1.707000 |
0.0 |
44.000000 |
True |
Sahith Theegala |
6899820 |
870 |
47 |
167 |
40 |
11 |
0 |
69.950000 |
302.400000 |
54.550000 |
67.050000 |
1.733000 |
0.0 |
51.300000 |
True |
Sam Burns |
17659657 |
971 |
62 |
201 |
45 |
20 |
5 |
69.766667 |
308.033333 |
58.100000 |
66.866667 |
1.720667 |
0.0 |
50.100000 |
True |
Scottie Scheffler |
30615994 |
1821 |
65 |
222 |
55 |
27 |
6 |
69.033333 |
308.433333 |
62.600000 |
71.466667 |
1.724333 |
0.0 |
44.400000 |
True |
Taylor Montgomery |
2382435 |
783 |
16 |
56 |
14 |
4 |
0 |
69.300000 |
304.900000 |
55.000000 |
62.800000 |
1.646000 |
0.0 |
52.500000 |
True |
Taylor Moore |
2825077 |
865 |
16 |
55 |
11 |
1 |
1 |
70.400000 |
301.200000 |
57.400000 |
65.600000 |
1.790000 |
0.0 |
48.200000 |
True |
Tom Hoge |
8109202 |
741 |
48 |
146 |
30 |
10 |
1 |
69.550000 |
296.150000 |
63.150000 |
68.250000 |
1.729000 |
0.0 |
52.050000 |
True |
Tony Finau |
15456084 |
953 |
63 |
214 |
52 |
18 |
4 |
69.433333 |
304.966667 |
58.400000 |
69.533333 |
1.729667 |
0.0 |
55.066667 |
True |
Wyndham Clark |
1948049 |
535 |
15 |
56 |
13 |
4 |
0 |
69.700000 |
313.000000 |
54.000000 |
67.900000 |
1.734000 |
0.0 |
52.800000 |
True |
Xander Schauffele |
15608924 |
585 |
53 |
181 |
47 |
20 |
3 |
69.400000 |
305.300000 |
59.433333 |
69.200000 |
1.726667 |
0.0 |
61.100000 |
True |
<style scoped>
.dataframe tbody tr th:only-of-type {
vertical-align: middle;
}
.dataframe tbody tr th {
vertical-align: top;
}
.dataframe thead th {
text-align: right;
}
</style>
|
Earnings |
Cup |
Evnts |
Rnds |
Cuts |
Top10 |
Wins |
Score |
DDIS |
DACC |
GIR |
PUTTS |
SAND |
BIRDS |
count |
2.600000e+01 |
26.000000 |
26.000000 |
26.000000 |
26.000000 |
26.000000 |
26.000000 |
26.000000 |
26.000000 |
26.000000 |
26.000000 |
26.000000 |
26.0 |
26.000000 |
mean |
9.369234e+06 |
1153.076923 |
39.538462 |
132.769231 |
31.153846 |
11.000000 |
1.615385 |
69.744872 |
301.410897 |
60.182051 |
67.393590 |
1.736301 |
0.0 |
53.456410 |
std |
7.495006e+06 |
831.900808 |
21.033746 |
70.764006 |
17.469270 |
7.828154 |
2.001538 |
0.353986 |
7.747845 |
4.333838 |
1.977024 |
0.029878 |
0.0 |
5.528432 |
min |
1.742013e+06 |
345.000000 |
12.000000 |
34.000000 |
7.000000 |
1.000000 |
0.000000 |
69.033333 |
280.700000 |
53.500000 |
62.800000 |
1.646000 |
0.0 |
44.000000 |
25% |
2.913248e+06 |
598.500000 |
16.000000 |
56.000000 |
12.250000 |
4.000000 |
0.000000 |
69.562500 |
296.612500 |
56.325000 |
66.033333 |
1.724917 |
0.0 |
50.300000 |
50% |
7.504511e+06 |
824.500000 |
46.000000 |
150.000000 |
34.000000 |
10.000000 |
1.000000 |
69.733333 |
303.900000 |
61.100000 |
66.991667 |
1.733500 |
0.0 |
52.650000 |
75% |
1.542360e+07 |
1394.750000 |
56.750000 |
197.000000 |
46.500000 |
18.750000 |
2.000000 |
69.925000 |
306.150000 |
62.900000 |
69.075000 |
1.753750 |
0.0 |
56.916667 |
max |
3.061599e+07 |
3305.000000 |
69.000000 |
234.000000 |
55.000000 |
27.000000 |
6.000000 |
70.400000 |
313.000000 |
68.700000 |
71.466667 |
1.808000 |
0.0 |
63.900000 |
new_df = df_2023.set_index('displayName')
first_cut.update(new_df[['Cup']])
first_cut['Cup'] = first_cut['Cup'].astype(int)
# 6 golfers automatically qualify for the American team on points,
#if points were taken today, this is who would auto qualify
first_six = first_cut.nlargest(6,'Cup')
index_list = first_six.index.tolist()
first_six
#print(index_list)
<style scoped>
.dataframe tbody tr th:only-of-type {
vertical-align: middle;
}
.dataframe tbody tr th {
vertical-align: top;
}
.dataframe thead th {
text-align: right;
}
</style>
|
Earnings |
Cup |
Evnts |
Rnds |
Cuts |
Top10 |
Wins |
Score |
DDIS |
DACC |
GIR |
PUTTS |
SAND |
BIRDS |
is_qualified |
displayName |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
Scottie Scheffler |
30615994 |
1844 |
65 |
222 |
55 |
27 |
6 |
69.033333 |
308.433333 |
62.6 |
71.466667 |
1.724333 |
0.0 |
44.400000 |
True |
Max Homa |
16514432 |
1801 |
63 |
208 |
50 |
16 |
5 |
69.933333 |
302.733333 |
60.6 |
65.800000 |
1.724333 |
0.0 |
51.000000 |
True |
Keegan Bradley |
10811950 |
1153 |
64 |
204 |
45 |
14 |
1 |
70.133333 |
301.766667 |
62.1 |
67.533333 |
1.756667 |
0.0 |
46.166667 |
True |
Kurt Kitayama |
5693388 |
1040 |
12 |
34 |
7 |
3 |
1 |
70.400000 |
306.400000 |
55.0 |
66.500000 |
1.764000 |
0.0 |
57.100000 |
True |
Chris Kirk |
3177760 |
1024 |
15 |
48 |
11 |
4 |
1 |
69.800000 |
295.800000 |
61.6 |
66.600000 |
1.697000 |
0.0 |
63.900000 |
True |
Sam Burns |
17659657 |
984 |
62 |
201 |
45 |
20 |
5 |
69.766667 |
308.033333 |
58.1 |
66.866667 |
1.720667 |
0.0 |
50.100000 |
True |
# remove the top 6 from the choices, they're on the team
USA_team = []
USA_team.extend(index_list)
# select rows where displayName is not in USA_team
second_cut = first_cut.drop(index=USA_team)
second_cut
<style scoped>
.dataframe tbody tr th:only-of-type {
vertical-align: middle;
}
.dataframe tbody tr th {
vertical-align: top;
}
.dataframe thead th {
text-align: right;
}
</style>
|
Earnings |
Cup |
Evnts |
Rnds |
Cuts |
Top10 |
Wins |
Score |
DDIS |
DACC |
GIR |
PUTTS |
SAND |
BIRDS |
is_qualified |
displayName |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
Adam Schenk |
1803965 |
584 |
18 |
61 |
12 |
1 |
0 |
70.400000 |
304.500000 |
53.500000 |
66.000000 |
1.808000 |
0.0 |
50.900000 |
True |
Andrew Putnam |
2314939 |
679 |
17 |
57 |
13 |
3 |
0 |
69.500000 |
283.100000 |
63.000000 |
68.700000 |
1.755000 |
0.0 |
61.700000 |
True |
Brendon Todd |
1742013 |
522 |
14 |
47 |
9 |
3 |
0 |
69.900000 |
280.700000 |
66.700000 |
65.500000 |
1.717000 |
0.0 |
59.700000 |
True |
Brian Harman |
8816347 |
739 |
69 |
234 |
54 |
13 |
0 |
69.800000 |
292.466667 |
67.166667 |
66.633333 |
1.755000 |
0.0 |
59.966667 |
True |
Collin Morikawa |
15965913 |
860 |
53 |
174 |
43 |
20 |
2 |
69.600000 |
296.533333 |
68.700000 |
69.966667 |
1.743667 |
0.0 |
52.300000 |
True |
Denny McCarthy |
4693126 |
480 |
45 |
154 |
35 |
7 |
0 |
69.700000 |
293.300000 |
61.850000 |
65.900000 |
1.740500 |
0.0 |
55.350000 |
True |
Harris English |
9063496 |
536 |
43 |
145 |
33 |
10 |
2 |
70.000000 |
296.850000 |
62.500000 |
64.750000 |
1.742000 |
0.0 |
49.300000 |
True |
Hayden Buckley |
1836289 |
594 |
14 |
44 |
8 |
3 |
0 |
69.800000 |
304.500000 |
64.400000 |
69.400000 |
1.755000 |
0.0 |
45.600000 |
True |
Jordan Spieth |
14858548 |
628 |
58 |
198 |
47 |
19 |
2 |
69.866667 |
303.300000 |
55.133333 |
66.133333 |
1.728667 |
0.0 |
56.366667 |
True |
Justin Thomas |
15326162 |
377 |
53 |
194 |
48 |
19 |
2 |
69.600000 |
307.633333 |
55.966667 |
66.933333 |
1.729667 |
0.0 |
60.766667 |
True |
Keith Mitchell |
5093720 |
450 |
40 |
134 |
32 |
9 |
0 |
69.700000 |
310.850000 |
63.750000 |
67.250000 |
1.750000 |
0.0 |
54.700000 |
True |
Patrick Cantlay |
21172810 |
821 |
53 |
176 |
45 |
23 |
6 |
69.100000 |
306.366667 |
62.433333 |
70.566667 |
1.737000 |
0.0 |
53.533333 |
True |
Rickie Fowler |
3209972 |
741 |
12 |
42 |
11 |
4 |
0 |
69.600000 |
305.500000 |
57.700000 |
69.400000 |
1.707000 |
0.0 |
44.000000 |
True |
Sahith Theegala |
6899820 |
869 |
47 |
167 |
40 |
11 |
0 |
69.950000 |
302.400000 |
54.550000 |
67.050000 |
1.733000 |
0.0 |
51.300000 |
True |
Taylor Montgomery |
2382435 |
783 |
16 |
56 |
14 |
4 |
0 |
69.300000 |
304.900000 |
55.000000 |
62.800000 |
1.646000 |
0.0 |
52.500000 |
True |
Taylor Moore |
2825077 |
865 |
16 |
55 |
11 |
1 |
1 |
70.400000 |
301.200000 |
57.400000 |
65.600000 |
1.790000 |
0.0 |
48.200000 |
True |
Tom Hoge |
8109202 |
755 |
48 |
146 |
30 |
10 |
1 |
69.550000 |
296.150000 |
63.150000 |
68.250000 |
1.729000 |
0.0 |
52.050000 |
True |
Tony Finau |
15456084 |
976 |
63 |
214 |
52 |
18 |
4 |
69.433333 |
304.966667 |
58.400000 |
69.533333 |
1.729667 |
0.0 |
55.066667 |
True |
Wyndham Clark |
1948049 |
535 |
15 |
56 |
13 |
4 |
0 |
69.700000 |
313.000000 |
54.000000 |
67.900000 |
1.734000 |
0.0 |
52.800000 |
True |
Xander Schauffele |
15608924 |
617 |
53 |
181 |
47 |
20 |
3 |
69.400000 |
305.300000 |
59.433333 |
69.200000 |
1.726667 |
0.0 |
61.100000 |
True |
#highest earners
top_earners = second_cut.nlargest(6, 'Earnings')
top_earners
<style scoped>
.dataframe tbody tr th:only-of-type {
vertical-align: middle;
}
.dataframe tbody tr th {
vertical-align: top;
}
.dataframe thead th {
text-align: right;
}
</style>
|
Earnings |
Cup |
Evnts |
Rnds |
Cuts |
Top10 |
Wins |
Score |
DDIS |
DACC |
GIR |
PUTTS |
SAND |
BIRDS |
is_qualified |
displayName |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
Patrick Cantlay |
21172810 |
821 |
53 |
176 |
45 |
23 |
6 |
69.100000 |
306.366667 |
62.433333 |
70.566667 |
1.737000 |
0.0 |
53.533333 |
True |
Collin Morikawa |
15965913 |
860 |
53 |
174 |
43 |
20 |
2 |
69.600000 |
296.533333 |
68.700000 |
69.966667 |
1.743667 |
0.0 |
52.300000 |
True |
Xander Schauffele |
15608924 |
617 |
53 |
181 |
47 |
20 |
3 |
69.400000 |
305.300000 |
59.433333 |
69.200000 |
1.726667 |
0.0 |
61.100000 |
True |
Tony Finau |
15456084 |
976 |
63 |
214 |
52 |
18 |
4 |
69.433333 |
304.966667 |
58.400000 |
69.533333 |
1.729667 |
0.0 |
55.066667 |
True |
Justin Thomas |
15326162 |
377 |
53 |
194 |
48 |
19 |
2 |
69.600000 |
307.633333 |
55.966667 |
66.933333 |
1.729667 |
0.0 |
60.766667 |
True |
Jordan Spieth |
14858548 |
628 |
58 |
198 |
47 |
19 |
2 |
69.866667 |
303.300000 |
55.133333 |
66.133333 |
1.728667 |
0.0 |
56.366667 |
True |
second_cut['Earnings_per_event'] = second_cut['Earnings'] / second_cut['Evnts']
second_cut['Points_per_event'] = second_cut['Cup'] / second_cut['Evnts']
second_cut['Top10_per_event'] = second_cut['Top10'] / second_cut['Evnts']
second_cut['Wins_per_event'] = second_cut['Wins'] / second_cut['Evnts']
second_cut
<style scoped>
.dataframe tbody tr th:only-of-type {
vertical-align: middle;
}
.dataframe tbody tr th {
vertical-align: top;
}
.dataframe thead th {
text-align: right;
}
</style>
|
Earnings |
Cup |
Evnts |
Rnds |
Cuts |
Top10 |
Wins |
Score |
DDIS |
DACC |
GIR |
PUTTS |
SAND |
BIRDS |
is_qualified |
Earnings_per_event |
Points_per_event |
Top10_per_event |
Wins_per_event |
displayName |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
Adam Schenk |
1803965 |
584 |
18 |
61 |
12 |
1 |
0 |
70.400000 |
304.500000 |
53.500000 |
66.000000 |
1.808000 |
0.0 |
50.900000 |
True |
100220.277778 |
32.444444 |
0.055556 |
0.000000 |
Andrew Putnam |
2314939 |
679 |
17 |
57 |
13 |
3 |
0 |
69.500000 |
283.100000 |
63.000000 |
68.700000 |
1.755000 |
0.0 |
61.700000 |
True |
136172.882353 |
39.941176 |
0.176471 |
0.000000 |
Brendon Todd |
1742013 |
522 |
14 |
47 |
9 |
3 |
0 |
69.900000 |
280.700000 |
66.700000 |
65.500000 |
1.717000 |
0.0 |
59.700000 |
True |
124429.500000 |
37.285714 |
0.214286 |
0.000000 |
Brian Harman |
8816347 |
739 |
69 |
234 |
54 |
13 |
0 |
69.800000 |
292.466667 |
67.166667 |
66.633333 |
1.755000 |
0.0 |
59.966667 |
True |
127773.144928 |
10.710145 |
0.188406 |
0.000000 |
Collin Morikawa |
15965913 |
860 |
53 |
174 |
43 |
20 |
2 |
69.600000 |
296.533333 |
68.700000 |
69.966667 |
1.743667 |
0.0 |
52.300000 |
True |
301243.641509 |
16.226415 |
0.377358 |
0.037736 |
Denny McCarthy |
4693126 |
480 |
45 |
154 |
35 |
7 |
0 |
69.700000 |
293.300000 |
61.850000 |
65.900000 |
1.740500 |
0.0 |
55.350000 |
True |
104291.688889 |
10.666667 |
0.155556 |
0.000000 |
Harris English |
9063496 |
536 |
43 |
145 |
33 |
10 |
2 |
70.000000 |
296.850000 |
62.500000 |
64.750000 |
1.742000 |
0.0 |
49.300000 |
True |
210778.976744 |
12.465116 |
0.232558 |
0.046512 |
Hayden Buckley |
1836289 |
594 |
14 |
44 |
8 |
3 |
0 |
69.800000 |
304.500000 |
64.400000 |
69.400000 |
1.755000 |
0.0 |
45.600000 |
True |
131163.500000 |
42.428571 |
0.214286 |
0.000000 |
Jordan Spieth |
14858548 |
628 |
58 |
198 |
47 |
19 |
2 |
69.866667 |
303.300000 |
55.133333 |
66.133333 |
1.728667 |
0.0 |
56.366667 |
True |
256181.862069 |
10.827586 |
0.327586 |
0.034483 |
Justin Thomas |
15326162 |
377 |
53 |
194 |
48 |
19 |
2 |
69.600000 |
307.633333 |
55.966667 |
66.933333 |
1.729667 |
0.0 |
60.766667 |
True |
289172.867925 |
7.113208 |
0.358491 |
0.037736 |
Keith Mitchell |
5093720 |
450 |
40 |
134 |
32 |
9 |
0 |
69.700000 |
310.850000 |
63.750000 |
67.250000 |
1.750000 |
0.0 |
54.700000 |
True |
127343.000000 |
11.250000 |
0.225000 |
0.000000 |
Patrick Cantlay |
21172810 |
821 |
53 |
176 |
45 |
23 |
6 |
69.100000 |
306.366667 |
62.433333 |
70.566667 |
1.737000 |
0.0 |
53.533333 |
True |
399486.981132 |
15.490566 |
0.433962 |
0.113208 |
Rickie Fowler |
3209972 |
741 |
12 |
42 |
11 |
4 |
0 |
69.600000 |
305.500000 |
57.700000 |
69.400000 |
1.707000 |
0.0 |
44.000000 |
True |
267497.666667 |
61.750000 |
0.333333 |
0.000000 |
Sahith Theegala |
6899820 |
869 |
47 |
167 |
40 |
11 |
0 |
69.950000 |
302.400000 |
54.550000 |
67.050000 |
1.733000 |
0.0 |
51.300000 |
True |
146804.680851 |
18.489362 |
0.234043 |
0.000000 |
Taylor Montgomery |
2382435 |
783 |
16 |
56 |
14 |
4 |
0 |
69.300000 |
304.900000 |
55.000000 |
62.800000 |
1.646000 |
0.0 |
52.500000 |
True |
148902.187500 |
48.937500 |
0.250000 |
0.000000 |
Taylor Moore |
2825077 |
865 |
16 |
55 |
11 |
1 |
1 |
70.400000 |
301.200000 |
57.400000 |
65.600000 |
1.790000 |
0.0 |
48.200000 |
True |
176567.312500 |
54.062500 |
0.062500 |
0.062500 |
Tom Hoge |
8109202 |
755 |
48 |
146 |
30 |
10 |
1 |
69.550000 |
296.150000 |
63.150000 |
68.250000 |
1.729000 |
0.0 |
52.050000 |
True |
168941.708333 |
15.729167 |
0.208333 |
0.020833 |
Tony Finau |
15456084 |
976 |
63 |
214 |
52 |
18 |
4 |
69.433333 |
304.966667 |
58.400000 |
69.533333 |
1.729667 |
0.0 |
55.066667 |
True |
245334.666667 |
15.492063 |
0.285714 |
0.063492 |
Wyndham Clark |
1948049 |
535 |
15 |
56 |
13 |
4 |
0 |
69.700000 |
313.000000 |
54.000000 |
67.900000 |
1.734000 |
0.0 |
52.800000 |
True |
129869.933333 |
35.666667 |
0.266667 |
0.000000 |
Xander Schauffele |
15608924 |
617 |
53 |
181 |
47 |
20 |
3 |
69.400000 |
305.300000 |
59.433333 |
69.200000 |
1.726667 |
0.0 |
61.100000 |
True |
294508.000000 |
11.641509 |
0.377358 |
0.056604 |
# Great! We have our second_cut dataframe which features golfers who qualify to fill the last 6 spots
# We can see there are some consistent names at the top of results categories
second_cut.index.name = 'displayName'
names = second_cut.index
print(names)
Index(['Adam Schenk', 'Andrew Putnam', 'Brendon Todd', 'Brian Harman',
'Collin Morikawa', 'Denny McCarthy', 'Harris English', 'Hayden Buckley',
'Jordan Spieth', 'Justin Thomas', 'Keith Mitchell', 'Patrick Cantlay',
'Rickie Fowler', 'Sahith Theegala', 'Taylor Montgomery', 'Taylor Moore',
'Tom Hoge', 'Tony Finau', 'Wyndham Clark', 'Xander Schauffele'],
dtype='object', name='displayName')
#let's export this in csv and take it to tableau
second_cut.to_csv('/Users/joeyglenn4/Desktop/Case Study/second_cut.csv', index=True)