Skip to content

Commit

Permalink
Fix date cleaning and date verification
Browse files Browse the repository at this point in the history
  • Loading branch information
sharibarboza committed Apr 16, 2019
1 parent 04762dd commit ce90853
Show file tree
Hide file tree
Showing 2 changed files with 12 additions and 14 deletions.
22 changes: 10 additions & 12 deletions py_zap/py_zap.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
# -*- coding: utf-8 -*-

'''
Python scraper for fetching Broadcast and Cable TV ratings from
Python scraper for fetching Broadcast and Cable TV ratings from
tvbythenumbers.zap2it.com
MIT License
Expand Down Expand Up @@ -54,11 +54,11 @@ def __init__(self, **kwargs):
kwargs[key] = convert_float(safe_unicode(value))
self.__dict__.update(kwargs)

def __repr__(self):
def __repr__(self):
"""Format row for entry object in a ratings chart"""
s = None

try:
try:
# Set width for network column (cable has longer width)
width = 7 if hasattr(self, 'share') else 16

Expand Down Expand Up @@ -102,9 +102,10 @@ def __init__(self, **kwargs):
for attr in ["show", "network"]:
key = kwargs.get(attr)
if key is not None and not isinstance(key, list):
kwargs[attr] = [key]
kwargs[attr] = [key]
self.__dict__.update(kwargs)

self.date = convert_string(self.date)
self.date_obj = convert_date(self.date)
self.weekday = get_day(self.date_obj)
self.soup = self._get_ratings_page()
Expand Down Expand Up @@ -151,6 +152,9 @@ def get_title(self):
else:
strings = get_strings(self.soup, 'b')

if len(strings) == 0:
strings = get_strings(self.soup, 'strong')

if len(strings) >= 1 and self.category == 'cable':
return strings[0]
elif len(strings) > 0 and 'Fast' in strings[-1]:
Expand Down Expand Up @@ -229,10 +233,6 @@ def _verify_page(self):
def _get_date_in_title(self):
"""Extract the date string from the title."""
title = unescape_html(''.join(self.get_title()))

# Extract string from header by getting last 3 words
#date_string = ' '.join(self.get_title().split()[-3:])
#return convert_string(date_string)
return convert_string(title)

def _get_ratings_page(self):
Expand All @@ -242,7 +242,7 @@ def _get_ratings_page(self):
soup = get_soup(self.url)
if soup:
return soup

# Try building url again with unshortened month
self._build_url(shorten=False)
soup = get_soup(self.url)
Expand Down Expand Up @@ -304,7 +304,7 @@ def fetch_entries(self):
if exceeded_limit(self.limit, len(data)):
break

entry = row.find_all('td')
entry = row.find_all('td')
entry_dict = {}

show = entry[0].string
Expand Down Expand Up @@ -463,5 +463,3 @@ def _get_rating(self, entry):
r_info += string
rating, share = r_info.split('/')
return (rating, share.strip('*'))


4 changes: 2 additions & 2 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ def readme():

setup(
name='py_zap',
version='1.2.1',
version='1.2.2',
description='Python scraper for accessing ratings from tvbythenumbers.zap2it.com',
long_description=readme(),
author='sharibarboza',
Expand All @@ -22,4 +22,4 @@ def readme():
'beautifulsoup4',
'requests>=2.9.1'
]
)
)

0 comments on commit ce90853

Please sign in to comment.