Bug Fixes

adamcyber1 · May 10, 2020 · 37cee5a · 37cee5a
1 parent ce6f892
commit 37cee5a
Show file tree

Hide file tree

Showing 9 changed files with 47 additions and 59 deletions.
diff --git a/.gitignore b/.gitignore
@@ -6,3 +6,4 @@ dist
 *.egg-info
 .coverage
 .~lock.example*
+.vscode
diff --git a/README.md b/README.md
@@ -59,9 +59,6 @@ Let's say you want to write a script which you'll run once a day, which will fin
 ```python
 from hockeydata import get_game_shifts, get_season_play_by_play, get_play_by_plays, list_games
 
-# get today's games ids
-game_list = list_games()
-
 # get a full year of games id
 game_list = list_games('2018-01-01', '2019-01-01')
 

diff --git a/examples/basic.py b/examples/basic.py
@@ -1,31 +1,31 @@
-import hockeydata
-
+from hockeydata import get_game_shifts, get_season_play_by_play, get_play_by_plays, list_games
+from hockeydata.output import json, csv
 
 def main():
+    # get today's games ids
+    game_list = list_games('2018-01-01', '2019-01-01')
+
+    # get a full year of games id
+    game_list = list_games('2018-01-01', '2019-01-01')
+
+    # get play by play data for a game
+    pbp = get_play_by_plays('2018021000')
+
+
+
+    # get shift data for a game
+    shifts = get_game_shifts('2018021000')
+
+    # use the formatters to put your data in different formats (you could just use pandas builtin functions if you want :) )
+    pbp_json = json.dumps(pbp)
+    pbp_csv = csv.dumps(pbp)
+
+    pbp_dict = json.to_dict(pbp)
 
-    #shifts = hockeydata.get_game_shifts('2018020028')
-    #res = hockeydata.get_season_play_by_play(2018)
-    #print(res)
-    # initializing dictionary
-    old_dict = {
-        "N.J": 1, "NYI": 2, "NYR": 3, "PHI": 4, "PIT": 5, "BOS": 6, "BUF": 7, "MTL": 8, "OTT": 9, "TOR": 10, "ATL": 11, "CAR": 12, "FLA": 13, "T.B": 14,
-        "WSH": 15, "CHI": 16, "DET": 17, "NSH": 18, "STL": 19, "CGY": 20, "COL": 21, "EDM": 22, "VAN": 23, "ANA": 24, "DAL": 25, "L.A": 26, "ARI": 27, "S.J": 28,
-        "CBJ": 29, "MIN": 30, "WPG": 52, "ARI": 53, "VGK": 54
-    }
-
-    new_dict = dict([(value, key) for key, value in old_dict.items()])
-
-    # Printing original dictionary
-    print ("Original dictionary is : ")
-    print(old_dict)
-
-    print()
-
-    # Printing new dictionary after swapping keys and values
-    print ("Dictionary after swapping is :  ")
-    print("keys: values")
-    for i in new_dict:
-        print(i, " :  ", new_dict[i])
+    print(pbp_dict)
+    # dump it to a file if you want
+    # pbp_json = json.dump(pbp, file_handle)
+    # pbp_csv = csv.dump(pbp, file_handle)
 
 if __name__ == '__main__':
     main()
diff --git a/hockeydata/api.py b/hockeydata/api.py
@@ -47,6 +47,7 @@ def get_season_play_by_play(season: int) -> DataFrame:
     """
     return scrape.get_season_pbp(season)
 
+
 def list_games(start: str, end: str) -> DataFrame:
     """
     Get a list of games in the specified time range
@@ -57,8 +58,6 @@ def list_games(start: str, end: str) -> DataFrame:
     """
     return scrape.get_games(start, end)
 
-def get_game_infos(*args):
-    return scrape.get_game_summaries(list(args))
-
-
 
+def get_game_infos(*args):
+    return scrape.get_game_summaries(list(args))
diff --git a/hockeydata/constants.py b/hockeydata/constants.py
@@ -478,20 +478,4 @@
     "CALVIN PETERSEN ,": "CALVIN PETERSEN",
     "CAL PETERSEN": "CALVIN PETERSEN",
     "ALEXANDER NYLANDER": "ALEX NYLANDER"
-}
-
-"""
-        player = 
-          case_when(
-            player == "SEBASTIAN.AHO" & position == "D" ~ "SEBASTIAN.AHO2",  ## D, ID 8480222
-            player == "ALEX.PICARD" & position == "L" ~ "ALEX.PICARD2",      ## L, ID 8471221
-            player == "SEAN.COLLINS" & position == "C" ~ "SEAN.COLLINS2",    ## C, ID 8474744
-            player == "COLIN.WHITE" & as.numeric(game_info_data$season) >= 20162017 ~ "COLIN.WHITE2",         ## C, ID 8478400
-            player == "ERIK.GUSTAFSSON" & as.numeric(game_info_data$season) >= 20152016 ~ "ERIK.GUSTAFSSON2", ## D, ID 8476979 (CHI player)
-            
-            player == "ANDREW.MILLER" & season == "20072008" ~ "DREW.MILLER", ## DREW.MILLER 8470778 ID
-            TRUE ~ player
-            )
-        )
-
-"""
+}
diff --git a/hockeydata/output/json.py b/hockeydata/output/json.py
@@ -6,4 +6,7 @@ def dump(plays, fobj):
     fobj.write(dumps(plays))
 
 def dumps(plays: DataFrame):
-    return plays.to_json(orient='records')
+    return plays.to_json(orient='records')
+
+def to_dict(plays: DataFrame):
+    return plays.to_dict(orient='records')
diff --git a/hockeydata/scrape/html_pbp.py b/hockeydata/scrape/html_pbp.py
@@ -55,27 +55,27 @@ def parse_html(game_id: str, raw_events: list, players: dict, teams: dict) -> Da
 
     :return: DataFrame with info
     """
-    home = safeget(teams, 'Home')
-    away = safeget(teams, 'Away')
+    home = safeget(teams, 'HOME')
+    away = safeget(teams, 'AWAY')
 
     dataframe = DataFrame(columns=HTML_COLUMNS)
 
     # each event gets converted to a series then appended to the Dataframe
-    for event2 in raw_events:
-        if not valid_event(event2):
+    for event in raw_events:
+        if not valid_event(event):
             continue
 
         # the only state required to parse an event is the players + teams
-        series = pd.Series(parse_event(event2, players, home, away), index=HTML_COLUMNS)
+        series = pd.Series(parse_event(event, players, home, away), index=HTML_COLUMNS)
 
         dataframe = dataframe.append(series, ignore_index=True)
 
 
     # post processing, this is where we add stuff that isn't directly parse-able from the html. i.e. scores, classifying
     # events as fenwick/corsi etc..
     # add some columns to our dataframe that we dont directly get from the HTML
-    dataframe['AWAY_TEAM'] = away
-    dataframe['HOME_TEAM'] = home
+    dataframe['AWAY'] = away
+    dataframe['HOME'] = home
     dataframe['GAME_ID'] = game_id
     dataframe['DATE'] = get_date(game_id)
     return dataframe

diff --git a/hockeydata/scrape/json_shifts.py b/hockeydata/scrape/json_shifts.py
@@ -65,7 +65,7 @@ def parse_shift(shift: dict) -> dict:
         shift_dict['TEAM'] = fix_team_tricode(shift['teamAbbrev'])
 
         # goal events have an eventDescription, we dont care about those.
-        if shift['eventDescription'] is not 'EVG':
+        if shift['eventDescription'] != 'EVG':
             shift_dict['START'] = common.to_seconds(shift['startTime'])
             shift_dict['END'] = common.to_seconds(shift['endTime'])
             shift_dict['DURATION'] = common.to_seconds(shift['duration'])

diff --git a/hockeydata/scrape/scrape.py b/hockeydata/scrape/scrape.py
@@ -13,6 +13,7 @@
 
 logger = logging.getLogger('LOG.scrape')
 
+
 def get_games(start: str, end: str) -> DataFrame:
     """
     Get the game ids for games that occured in the given time range (inclusive)
@@ -86,10 +87,13 @@ def get_game_pbp(game_id: str) -> DataFrame:
 
     logger.info("Scraping Game: {}".format(game_id))
     pbp = game_html_pbp(game_id)
+    print(pbp)
     pbp = add_event_coordinates(pbp, game_id)
+    print(pbp)
 
     return pbp
 
+
 def get_games_pbp(game_ids: list) -> DataFrame:
     """
     Gets the pbp for a list of games. This function is just in charge of merging the output
@@ -127,6 +131,7 @@ def get_game_shifts(game_id: str) -> DataFrame:
 
     return shifts
 
+
 def get_games_shifts(game_ids: list) -> DataFrame:
     """
     Gets the shifts for list of games and returns a single merged dataframe
@@ -149,7 +154,6 @@ def get_games_shifts(game_ids: list) -> DataFrame:
         return None
 
 
-
 def game_html_pbp(game_id: str) -> DataFrame:
     """
     Gets the play by play data from HTMLREPORTS.