Skip to content

Commit

Permalink
add basic scraper functionality, change input_country function to ret…
Browse files Browse the repository at this point in the history
…urn alpha_3 for the scraper
  • Loading branch information
kronicka committed Dec 29, 2018
1 parent a4b77cd commit 0c0a816
Show file tree
Hide file tree
Showing 3 changed files with 28 additions and 8 deletions.
16 changes: 11 additions & 5 deletions app.py
Original file line number Diff line number Diff line change
Expand Up @@ -68,8 +68,11 @@ def input_country() -> str:
country = input('Enter your country (full name or 2-letter alias):\n')
country_names = [country.name for country in countries]
country_codes = [country.alpha_2 for country in countries]
if country.capitalize() in country_names or country.upper() in country_codes:
return country

if country.capitalize() in country_names:
return countries.get(name=country.capitalize()).alpha_3
elif country.upper() in country_codes:
return countries.get(alpha_2=country.upper()).alpha_3
else:
print('Please enter a valid country name.')

Expand Down Expand Up @@ -128,8 +131,11 @@ def generate_calendar(units: int, unit_type: str = None):


if __name__ == '__main__':
dob = input_dob()
# dob = input_dob()
# sex = input_sex()
# weeks = calculate_weeks(sex, *dob)
days = calculate_days(*dob)
generate_calendar(days, 'days')
# days = calculate_days(*dob)
# generate_calendar(days, 'days')
c = input_country()
print(c)

3 changes: 2 additions & 1 deletion requirements.txt
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
bs4
Pillow>=5.3.0
python-dateutil
pycountry>=18.12.8
pycountry>=18.12.8
requests
17 changes: 15 additions & 2 deletions scraper.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,20 @@
# Scraper of the The World Bank most recent life expectancy stats
# for when the prediction based on the country is made
from bs4 import BeautifulSoup
import requests

soup = BeautifulSoup('<html><body><b>Hi</b></body></html>', 'html.parser')
country = 'RUS'
widget_url = 'https://databank.worldbank.org/data/views/reports/reportwidget.aspx'
report_url = f'?Report_Name=CountryProfile&Id=b450fd57&tbar=y&dd=y&inf=n&zm=n&country={country}'
data_url = widget_url + report_url

print(soup.body)
response = requests.get(data_url)

onclick_value = "loadMetaData('SP.DYN.LE00.IN','S','Series','Life expectancy at birth, total (years)','2','1801')"
soup = BeautifulSoup(response.text, 'html.parser') \
.find(attrs={'onclick': onclick_value}) \
.parent \
.find_all('td')[-1] \
.find('div').get_text()

print(soup)

0 comments on commit 0c0a816

Please sign in to comment.