Skip to content

Commit

Permalink
Bug Fixes
Browse files Browse the repository at this point in the history
  • Loading branch information
adamcyber1 committed May 10, 2020
1 parent ce6f892 commit 37cee5a
Show file tree
Hide file tree
Showing 9 changed files with 47 additions and 59 deletions.
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -6,3 +6,4 @@ dist
*.egg-info
.coverage
.~lock.example*
.vscode
3 changes: 0 additions & 3 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -59,9 +59,6 @@ Let's say you want to write a script which you'll run once a day, which will fin
```python
from hockeydata import get_game_shifts, get_season_play_by_play, get_play_by_plays, list_games

# get today's games ids
game_list = list_games()

# get a full year of games id
game_list = list_games('2018-01-01', '2019-01-01')

Expand Down
50 changes: 25 additions & 25 deletions examples/basic.py
Original file line number Diff line number Diff line change
@@ -1,31 +1,31 @@
import hockeydata

from hockeydata import get_game_shifts, get_season_play_by_play, get_play_by_plays, list_games
from hockeydata.output import json, csv

def main():
# get today's games ids
game_list = list_games('2018-01-01', '2019-01-01')

# get a full year of games id
game_list = list_games('2018-01-01', '2019-01-01')

# get play by play data for a game
pbp = get_play_by_plays('2018021000')



# get shift data for a game
shifts = get_game_shifts('2018021000')

# use the formatters to put your data in different formats (you could just use pandas builtin functions if you want :) )
pbp_json = json.dumps(pbp)
pbp_csv = csv.dumps(pbp)

pbp_dict = json.to_dict(pbp)

#shifts = hockeydata.get_game_shifts('2018020028')
#res = hockeydata.get_season_play_by_play(2018)
#print(res)
# initializing dictionary
old_dict = {
"N.J": 1, "NYI": 2, "NYR": 3, "PHI": 4, "PIT": 5, "BOS": 6, "BUF": 7, "MTL": 8, "OTT": 9, "TOR": 10, "ATL": 11, "CAR": 12, "FLA": 13, "T.B": 14,
"WSH": 15, "CHI": 16, "DET": 17, "NSH": 18, "STL": 19, "CGY": 20, "COL": 21, "EDM": 22, "VAN": 23, "ANA": 24, "DAL": 25, "L.A": 26, "ARI": 27, "S.J": 28,
"CBJ": 29, "MIN": 30, "WPG": 52, "ARI": 53, "VGK": 54
}

new_dict = dict([(value, key) for key, value in old_dict.items()])

# Printing original dictionary
print ("Original dictionary is : ")
print(old_dict)

print()

# Printing new dictionary after swapping keys and values
print ("Dictionary after swapping is : ")
print("keys: values")
for i in new_dict:
print(i, " : ", new_dict[i])
print(pbp_dict)
# dump it to a file if you want
# pbp_json = json.dump(pbp, file_handle)
# pbp_csv = csv.dump(pbp, file_handle)

if __name__ == '__main__':
main()
7 changes: 3 additions & 4 deletions hockeydata/api.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,7 @@ def get_season_play_by_play(season: int) -> DataFrame:
"""
return scrape.get_season_pbp(season)


def list_games(start: str, end: str) -> DataFrame:
"""
Get a list of games in the specified time range
Expand All @@ -57,8 +58,6 @@ def list_games(start: str, end: str) -> DataFrame:
"""
return scrape.get_games(start, end)

def get_game_infos(*args):
return scrape.get_game_summaries(list(args))



def get_game_infos(*args):
return scrape.get_game_summaries(list(args))
18 changes: 1 addition & 17 deletions hockeydata/constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -478,20 +478,4 @@
"CALVIN PETERSEN ,": "CALVIN PETERSEN",
"CAL PETERSEN": "CALVIN PETERSEN",
"ALEXANDER NYLANDER": "ALEX NYLANDER"
}

"""
player =
case_when(
player == "SEBASTIAN.AHO" & position == "D" ~ "SEBASTIAN.AHO2", ## D, ID 8480222
player == "ALEX.PICARD" & position == "L" ~ "ALEX.PICARD2", ## L, ID 8471221
player == "SEAN.COLLINS" & position == "C" ~ "SEAN.COLLINS2", ## C, ID 8474744
player == "COLIN.WHITE" & as.numeric(game_info_data$season) >= 20162017 ~ "COLIN.WHITE2", ## C, ID 8478400
player == "ERIK.GUSTAFSSON" & as.numeric(game_info_data$season) >= 20152016 ~ "ERIK.GUSTAFSSON2", ## D, ID 8476979 (CHI player)
player == "ANDREW.MILLER" & season == "20072008" ~ "DREW.MILLER", ## DREW.MILLER 8470778 ID
TRUE ~ player
)
)
"""
}
5 changes: 4 additions & 1 deletion hockeydata/output/json.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,4 +6,7 @@ def dump(plays, fobj):
fobj.write(dumps(plays))

def dumps(plays: DataFrame):
return plays.to_json(orient='records')
return plays.to_json(orient='records')

def to_dict(plays: DataFrame):
return plays.to_dict(orient='records')
14 changes: 7 additions & 7 deletions hockeydata/scrape/html_pbp.py
Original file line number Diff line number Diff line change
Expand Up @@ -55,27 +55,27 @@ def parse_html(game_id: str, raw_events: list, players: dict, teams: dict) -> Da
:return: DataFrame with info
"""
home = safeget(teams, 'Home')
away = safeget(teams, 'Away')
home = safeget(teams, 'HOME')
away = safeget(teams, 'AWAY')

dataframe = DataFrame(columns=HTML_COLUMNS)

# each event gets converted to a series then appended to the Dataframe
for event2 in raw_events:
if not valid_event(event2):
for event in raw_events:
if not valid_event(event):
continue

# the only state required to parse an event is the players + teams
series = pd.Series(parse_event(event2, players, home, away), index=HTML_COLUMNS)
series = pd.Series(parse_event(event, players, home, away), index=HTML_COLUMNS)

dataframe = dataframe.append(series, ignore_index=True)


# post processing, this is where we add stuff that isn't directly parse-able from the html. i.e. scores, classifying
# events as fenwick/corsi etc..
# add some columns to our dataframe that we dont directly get from the HTML
dataframe['AWAY_TEAM'] = away
dataframe['HOME_TEAM'] = home
dataframe['AWAY'] = away
dataframe['HOME'] = home
dataframe['GAME_ID'] = game_id
dataframe['DATE'] = get_date(game_id)
return dataframe
Expand Down
2 changes: 1 addition & 1 deletion hockeydata/scrape/json_shifts.py
Original file line number Diff line number Diff line change
Expand Up @@ -65,7 +65,7 @@ def parse_shift(shift: dict) -> dict:
shift_dict['TEAM'] = fix_team_tricode(shift['teamAbbrev'])

# goal events have an eventDescription, we dont care about those.
if shift['eventDescription'] is not 'EVG':
if shift['eventDescription'] != 'EVG':
shift_dict['START'] = common.to_seconds(shift['startTime'])
shift_dict['END'] = common.to_seconds(shift['endTime'])
shift_dict['DURATION'] = common.to_seconds(shift['duration'])
Expand Down
6 changes: 5 additions & 1 deletion hockeydata/scrape/scrape.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@

logger = logging.getLogger('LOG.scrape')


def get_games(start: str, end: str) -> DataFrame:
"""
Get the game ids for games that occured in the given time range (inclusive)
Expand Down Expand Up @@ -86,10 +87,13 @@ def get_game_pbp(game_id: str) -> DataFrame:

logger.info("Scraping Game: {}".format(game_id))
pbp = game_html_pbp(game_id)
print(pbp)
pbp = add_event_coordinates(pbp, game_id)
print(pbp)

return pbp


def get_games_pbp(game_ids: list) -> DataFrame:
"""
Gets the pbp for a list of games. This function is just in charge of merging the output
Expand Down Expand Up @@ -127,6 +131,7 @@ def get_game_shifts(game_id: str) -> DataFrame:

return shifts


def get_games_shifts(game_ids: list) -> DataFrame:
"""
Gets the shifts for list of games and returns a single merged dataframe
Expand All @@ -149,7 +154,6 @@ def get_games_shifts(game_ids: list) -> DataFrame:
return None



def game_html_pbp(game_id: str) -> DataFrame:
"""
Gets the play by play data from HTMLREPORTS.
Expand Down

0 comments on commit 37cee5a

Please sign in to comment.