-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathsamebday.py
115 lines (82 loc) · 2.9 KB
/
samebday.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
import argparse
from datetime import datetime
import requests
from bs4 import BeautifulSoup
URL_BASE = 'https://en.wikipedia.org'
def generate_url(date):
'''
given a yyyy-mm-dd date, return a tuple with two things =>
the year
the correct wikipedia URL
'''
# 🎵 cut my date into pieces
# this is my last resort 🎵
year, month, day = [int(x) for x in date.split('-')]
# format in the Wikipedia Way
formatted_date = datetime(2018, month, day).strftime('%B_%d')
# return year and URL
return (year, f'{URL_BASE}/wiki/{formatted_date}')
def parse_wiki_bday(target_year, url):
'''
parse birthday list on the wikipedia page
returns a tuple with two lists of data =>
list of lists of peeps w/ *exact* same bday
list of list of peeps w/ same bday different year
'''
# fetch the page
r = requests.get(url)
# turn it into soup
soup = BeautifulSoup(r.text, 'html.parser')
# target the correct list items
birthdays = soup.find('span', {'id': 'Births'}) \
.parent.next_sibling.next_sibling \
.find_all('li')
# make a couple of output lists
exact_same_b = []
other_b = []
# loop over list items on the page
for b in birthdays:
# split out year born and person strings
year, human = [x.strip() for x in b.text.split('–')]
# grab the wikipedia link from the last `a`
# tag inside that element
href = b.find_all('a')[-1]['href']
# turn it into a fully qualified URL
url = f'{URL_BASE}{href}'
# here the stuff is!
data_out = [year, human, url]
# after killing out "BC", does this person's
# birthday year match the input year?
# either way, append accordingly to an output list
if int(year.replace('BC', '').strip()) == target_year:
exact_same_b.append(data_out)
else:
other_b.append(data_out)
# return those two lists
return (exact_same_b, other_b)
if __name__ == '__main__':
# load up that parser, baby
parser = argparse.ArgumentParser()
# add the one positional argument
parser.add_argument('date', help='date in yyyy-mm-dd format')
args = parser.parse_args()
# grab the year and wikipedia URL
yearurl = generate_url(args.date)
# parse the page
output = parse_wiki_bday(*yearurl)
# assign the output to new variables
exact_same_b, other_b = output
# add up total records
total = len(output[0]) + len(output[1])
# then boom print out all the junk here
print('')
print(f'Found {total:,} famous birthday buds')
print('🥓'*15)
print('')
print('🎉 EXACT SAME BIRTHDAY 🎉')
for sb in exact_same_b:
print(' - '.join(sb))
print('')
print('👉 same birthday different year 👈')
for sb in other_b:
print(' - '.join(sb))