Skip to content

Latest commit

 

History

History
1230 lines (1030 loc) · 36.3 KB

README.md

File metadata and controls

1230 lines (1030 loc) · 36.3 KB

Plot Your CitiBike Rides!

Developer: @mattwfranchi

This notebook contains code to map out your CitiBike rides in the New York City metro area, upon exporting a HTML file of your rides page.

The exact route is estimated via a shortest path algorithm locked to the bike network, as reported by OpenStreetMap. I provide my 2023 rides as an example. The contextily base map can either be commented out, or provide an free API key for Stadia Maps. Pip-install the packages in the below cell.

I provide the station locations CSV as well, but if you want to recompute, simply download a month of ride data from link.

import pandas as pd
pd.options.mode.chained_assignment = None  # default='warn'

import random
import matplotlib.colors as mcolors
import calendar

import geopandas as gpd
from bs4 import BeautifulSoup

from shapely.geometry import Point, LineString

# plot
import matplotlib.pyplot as plt
import contextily as ctx
import osmnx as ox
# Function to extract bike ride details from a 'div' element
def extract_bike_ride_details(div_element):
    ride_details = {}

    # Extracting the primary details like Date, Price, Start Time, and Duration
    primary_div = div_element.find("div", class_="Components__Primary-sc-1ff1hfy-3 fiNHEp")
    ride_details['Date'] = primary_div.find("div", class_="sc-cx1xxi-0 kDzQzX").get_text(strip=True)
    detail_divs = primary_div.find_all("div", class_="Components__Detail-sc-1ff1hfy-9 dmtsts")
    ride_details['Price'] = detail_divs[0].get_text(strip=True).split(':')[-1].strip()
    ride_details['Start Time'] = detail_divs[1].get_text(strip=True).strip('Start Time:').strip()
    ride_details['Duration'] = detail_divs[2].get_text(strip=True).split(':')[-1].strip()

    # Extracting the secondary details like Start and End Locations
    secondary_div = div_element.find_next_sibling("div", {"data-testid": "DATA_TESTID_RIDE_DETAILS_CONTAINER"})
    start_end_divs = secondary_div.find_all("div", class_="sc-cx1xxi-0 lbVIIX")
    ride_details['Start Location'] = start_end_divs[0].get_text(strip=True).split("Started at")[0].strip()
    ride_details['End Location'] = start_end_divs[1].get_text(strip=True).split("Ended at")[0].strip()

    # Extracting the type of bike by searching all span elements for "Ebike ride"
    all_spans = secondary_div.find_all("span")
    ride_details['Bike Type'] = "Ebike" if any("Ebike ride" in span.get_text() for span in all_spans) else "Normal Bike"

    return ride_details

# Function to process the HTML file and extract bike ride data
def process_bike_rides(file_path):
    with open(file_path, 'r', encoding='utf-8') as file:
        html_content = file.read()
    soup = BeautifulSoup(html_content, 'html.parser')
    bike_ride_cards = soup.find_all("div", {"data-testid": "DATA_TESTID_RIDE_OVERVIEW_CARD"})

    bike_ride_data = [extract_bike_ride_details(card) for card in bike_ride_cards]
    return pd.DataFrame(bike_ride_data)
# File path to the HTML file
file_path = 'data/citibike_ride_history.html'  # Replace with the path to your HTML file

# Process the file and get the DataFrame
df_bike_rides = process_bike_rides(file_path)
df_bike_rides
<style scoped> .dataframe tbody tr th:only-of-type { vertical-align: middle; }
.dataframe tbody tr th {
    vertical-align: top;
}

.dataframe thead th {
    text-align: right;
}
</style>
Date Price Start Time Duration Start Location End Location Bike Type
0 December 16, 2023 $4.81 3:28 AM 26 min 4 Ave & E 12 St Roosevelt Island Tramway Ebike
1 December 13, 2023 $2.22 1:54 AM 12 min 49 Ave & 21 St Roosevelt Island Tramway Ebike
2 December 12, 2023 $2.22 6:06 PM 12 min 45 Ave & 21 St Roosevelt Island Tramway Ebike
3 December 10, 2023 $0.00 6:55 PM 4 min 46 Ave & 5 St Center Blvd & 51 Ave Normal Bike
4 December 10, 2023 $2.04 3:03 AM 11 min 44 Dr & Jackson Ave Roosevelt Island Tramway Ebike
... ... ... ... ... ... ... ...
88 August 23, 2023 $1.30 7:54 PM 7 min 36 Ave & 10 St Roosevelt Island Tramway Ebike
89 August 23, 2023 $0.00 7:30 PM 23 min 35 St & Broadway 36 Ave & 10 St Normal Bike
90 August 23, 2023 $0.00 6:04 PM 15 min 36 Ave & 10 St 35 St & Broadway Normal Bike
91 August 23, 2023 $1.67 5:07 PM 9 min Southpoint Park 36 Ave & 10 St Ebike
92 August 23, 2023 $2.04 3:54 AM 11 min 21 St & 43 Ave Roosevelt Island Tramway Ebike

93 rows × 7 columns

df_bike_rides.to_csv('data/citibike_ride_history.csv', index=False)
# ALT: GENERATE STATION ADDRESSES FROM CITI BIKE DATA
# read in citi bike system data 
#df_citi = pd.read_csv('data/202311-citibike-tripdata.csv', low_memory=False)

# group by start_station name to find all unique stations 
#df_citi_grouped = df_citi.groupby('start_station_name').agg('first')

#df_citi_grouped = df_citi_grouped.reset_index()
#df_citi_grouped = df_citi_grouped[['start_station_name', 'start_lat', 'start_lng']]

#df_citi_station_addresses = df_citi_grouped 
#df_citi_station_addresses.columns = ['Station Name', 'Latitude', 'Longitude']

# USING PREGENERATED STATION ADDRESSES
df_citi_station_addresses = pd.read_csv('data/citibike_station_addresses.csv')

# if regenerating manually, set CRS here to EPSG:4326, and then project to EPSG:2263
df_citi_station_addresses = gpd.GeoDataFrame(df_citi_station_addresses, geometry=gpd.points_from_xy(df_citi_station_addresses.Longitude, df_citi_station_addresses.Latitude), crs='EPSG:2263')

# update Latitude and Longitude to match the projected geometry
df_citi_station_addresses['Latitude'] = df_citi_station_addresses.geometry.y
df_citi_station_addresses['Longitude'] = df_citi_station_addresses.geometry.x
df_citi_station_addresses
<style scoped> .dataframe tbody tr th:only-of-type { vertical-align: middle; }
.dataframe tbody tr th {
    vertical-align: top;
}

.dataframe thead th {
    text-align: right;
}
</style>
Station Name Latitude Longitude geometry
0 1 Ave & E 110 St 227939.737227 1.001363e+06 POINT (1001363.231 227939.737)
1 1 Ave & E 16 St 206023.882314 9.893094e+05 POINT (989309.360 206023.882)
2 1 Ave & E 18 St 206624.955954 9.896419e+05 POINT (989641.924 206624.956)
3 1 Ave & E 30 St 209450.223705 9.910897e+05 POINT (991089.694 209450.224)
4 1 Ave & E 39 St 211481.435991 9.922494e+05 POINT (992249.373 211481.436)
... ... ... ... ...
2079 Wyckoff Ave & Stanhope St 195607.812124 1.007048e+06 POINT (1007048.187 195607.812)
2080 Wyckoff St & 3 Ave 188051.519400 9.890613e+05 POINT (989061.334 188051.519)
2081 Wythe Ave & Metropolitan Ave 200524.563747 9.945173e+05 POINT (994517.284 200524.564)
2082 Wythe Ave & N 13 St 202604.005990 9.961669e+05 POINT (996166.908 202604.006)
2083 Yankee Ferry Terminal 189593.613341 9.796030e+05 POINT (979602.991 189593.613)

2084 rows × 4 columns

# now, merge my rides with the citi bike station data
df_bike_rides = df_bike_rides.merge(df_citi_station_addresses, how='left', left_on='Start Location', right_on='Station Name')
df_bike_rides = df_bike_rides.merge(df_citi_station_addresses, how='left', left_on='End Location', right_on='Station Name', suffixes=('_start', '_end'))
df_bike_rides
<style scoped> .dataframe tbody tr th:only-of-type { vertical-align: middle; }
.dataframe tbody tr th {
    vertical-align: top;
}

.dataframe thead th {
    text-align: right;
}
</style>
Date Price Start Time Duration Start Location End Location Bike Type Station Name_start Latitude_start Longitude_start geometry_start Station Name_end Latitude_end Longitude_end geometry_end
0 December 16, 2023 $4.81 3:28 AM 26 min 4 Ave & E 12 St Roosevelt Island Tramway Ebike 4 Ave & E 12 St 206199.988145 9.869909e+05 POINT (986990.904 206199.988) Roosevelt Island Tramway 215173.214551 9.971106e+05 POINT (997110.583 215173.215)
1 December 13, 2023 $2.22 1:54 AM 12 min 49 Ave & 21 St Roosevelt Island Tramway Ebike 49 Ave & 21 St 209823.829244 9.983927e+05 POINT (998392.707 209823.829) Roosevelt Island Tramway 215173.214551 9.971106e+05 POINT (997110.583 215173.215)
2 December 12, 2023 $2.22 6:06 PM 12 min 45 Ave & 21 St Roosevelt Island Tramway Ebike 45 Ave & 21 St 211568.520867 9.987212e+05 POINT (998721.205 211568.521) Roosevelt Island Tramway 215173.214551 9.971106e+05 POINT (997110.583 215173.215)
3 December 10, 2023 $0.00 6:55 PM 4 min 46 Ave & 5 St Center Blvd & 51 Ave Normal Bike 46 Ave & 5 St 211545.339225 9.968545e+05 POINT (996854.495 211545.339) Center Blvd & 51 Ave 210082.440557 9.954471e+05 POINT (995447.100 210082.441)
4 December 10, 2023 $2.04 3:03 AM 11 min 44 Dr & Jackson Ave Roosevelt Island Tramway Ebike 44 Dr & Jackson Ave 211500.654971 9.999708e+05 POINT (999970.755 211500.655) Roosevelt Island Tramway 215173.214551 9.971106e+05 POINT (997110.583 215173.215)
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
88 August 23, 2023 $1.30 7:54 PM 7 min 36 Ave & 10 St Roosevelt Island Tramway Ebike 36 Ave & 10 St 216694.693029 1.000559e+06 POINT (1000559.392 216694.693) Roosevelt Island Tramway 215173.214551 9.971106e+05 POINT (997110.583 215173.215)
89 August 23, 2023 $0.00 7:30 PM 23 min 35 St & Broadway 36 Ave & 10 St Normal Bike 35 St & Broadway 216281.048071 1.005755e+06 POINT (1005755.045 216281.048) 36 Ave & 10 St 216694.693029 1.000559e+06 POINT (1000559.392 216694.693)
90 August 23, 2023 $0.00 6:04 PM 15 min 36 Ave & 10 St 35 St & Broadway Normal Bike 36 Ave & 10 St 216694.693029 1.000559e+06 POINT (1000559.392 216694.693) 35 St & Broadway 216281.048071 1.005755e+06 POINT (1005755.045 216281.048)
91 August 23, 2023 $1.67 5:07 PM 9 min Southpoint Park 36 Ave & 10 St Ebike Southpoint Park 213849.768503 9.956525e+05 POINT (995652.508 213849.769) 36 Ave & 10 St 216694.693029 1.000559e+06 POINT (1000559.392 216694.693)
92 August 23, 2023 $2.04 3:54 AM 11 min 21 St & 43 Ave Roosevelt Island Tramway Ebike 21 St & 43 Ave 212716.179791 9.992669e+05 POINT (999266.925 212716.180) Roosevelt Island Tramway 215173.214551 9.971106e+05 POINT (997110.583 215173.215)

93 rows × 15 columns

# any na values? 
df_bike_rides.isna().sum()
Date                  0
Price                 0
Start Time            0
Duration              0
Start Location        0
End Location          0
Bike Type             0
Station Name_start    3
Latitude_start        3
Longitude_start       3
geometry_start        3
Station Name_end      3
Latitude_end          3
Longitude_end         3
geometry_end          3
dtype: int64
# check na rows 
for row in ['Station Name_start', 'Station Name_end']:
    print(df_bike_rides[df_bike_rides[row].isna()].to_string()+'\n')
                  Date   Price Start Time     Duration     Start Location          End Location    Bike Type Station Name_start  Latitude_start  Longitude_start geometry_start      Station Name_end   Latitude_end  Longitude_end                   geometry_end
57  September 21, 2023   $0.00    2:40 AM       10 min  E 9 St & Avenue C       E 11 St & 1 Ave  Normal Bike                NaN             NaN              NaN           None       E 11 St & 1 Ave  205067.655684  988610.398208  POINT (988610.398 205067.656)
79     August 27, 2023  $11.48    1:55 AM  1 hr, 2 min  E 2 St & Avenue B       Southpoint Park        Ebike                NaN             NaN              NaN           None       Southpoint Park  213849.768503  995652.507953  POINT (995652.508 213849.769)
80     August 27, 2023   $8.70    1:55 AM       47 min  E 2 St & Avenue B  Broadway & Moylan Pl        Ebike                NaN             NaN              NaN           None  Broadway & Moylan Pl  235975.215153  995587.071777  POINT (995587.072 235975.215)

                  Date  Price Start Time Duration       Start Location       End Location    Bike Type   Station Name_start  Latitude_start  Longitude_start                 geometry_start Station Name_end  Latitude_end  Longitude_end geometry_end
50  September 29, 2023  $0.00    9:22 PM    4 min  FDR Drive & E 35 St    2 Ave & E 31 St  Normal Bike  FDR Drive & E 35 St    210320.19063    992170.673673  POINT (992170.674 210320.191)              NaN           NaN            NaN         None
68   September 9, 2023  $0.00   12:59 AM    4 min       E 7 St & Ave C  E 7 St & Avenue A  Normal Bike       E 7 St & Ave C    203097.31377    990046.923666  POINT (990046.924 203097.314)              NaN           NaN            NaN         None
69   September 9, 2023  $0.93   12:58 AM    5 min       E 7 St & Ave C  E 7 St & Avenue A        Ebike       E 7 St & Ave C    203097.31377    990046.923666  POINT (990046.924 203097.314)              NaN           NaN            NaN         None
# upon inspection, the station names are slightly different. Not sure if this happens at other stations I hadn't been to this year,
# so update this mapper if you have other stations from the previous cell with different names
station_name_map = { 
    "E 9 St & Ave C": "E 9 St & Avenue C",
    "E 2 St & Ave B": "E 2 St & Avenue B",
}


# run the map through df_bike_rides and replace the station names, if necessary
for row in ['Station Name_start', 'Station Name_end']:
    df_bike_rides[row] = df_bike_rides[row].replace(station_name_map)
# check na rows 
for row in ['Station Name_start', 'Station Name_end']:
    print(df_bike_rides[df_bike_rides[row].isna()].to_string()+'\n')

# we're good! no NA rows. 
                  Date   Price Start Time     Duration     Start Location          End Location    Bike Type Station Name_start  Latitude_start  Longitude_start geometry_start      Station Name_end   Latitude_end  Longitude_end                   geometry_end
57  September 21, 2023   $0.00    2:40 AM       10 min  E 9 St & Avenue C       E 11 St & 1 Ave  Normal Bike                NaN             NaN              NaN           None       E 11 St & 1 Ave  205067.655684  988610.398208  POINT (988610.398 205067.656)
79     August 27, 2023  $11.48    1:55 AM  1 hr, 2 min  E 2 St & Avenue B       Southpoint Park        Ebike                NaN             NaN              NaN           None       Southpoint Park  213849.768503  995652.507953  POINT (995652.508 213849.769)
80     August 27, 2023   $8.70    1:55 AM       47 min  E 2 St & Avenue B  Broadway & Moylan Pl        Ebike                NaN             NaN              NaN           None  Broadway & Moylan Pl  235975.215153  995587.071777  POINT (995587.072 235975.215)

                  Date  Price Start Time Duration       Start Location       End Location    Bike Type   Station Name_start  Latitude_start  Longitude_start                 geometry_start Station Name_end  Latitude_end  Longitude_end geometry_end
50  September 29, 2023  $0.00    9:22 PM    4 min  FDR Drive & E 35 St    2 Ave & E 31 St  Normal Bike  FDR Drive & E 35 St    210320.19063    992170.673673  POINT (992170.674 210320.191)              NaN           NaN            NaN         None
68   September 9, 2023  $0.00   12:59 AM    4 min       E 7 St & Ave C  E 7 St & Avenue A  Normal Bike       E 7 St & Ave C    203097.31377    990046.923666  POINT (990046.924 203097.314)              NaN           NaN            NaN         None
69   September 9, 2023  $0.93   12:58 AM    5 min       E 7 St & Ave C  E 7 St & Avenue A        Ebike       E 7 St & Ave C    203097.31377    990046.923666  POINT (990046.924 203097.314)              NaN           NaN            NaN         None
# drop na rows
df_bike_rides = df_bike_rides.dropna()
# nyc roads graph from place 
nyc = ox.graph_from_place('New York City, New York, USA', network_type='bike')
nyc = ox.distance.add_edge_lengths(nyc)

# to gdf 
nyc_gdf = ox.graph_to_gdfs(nyc, nodes=False, edges=True)
# project to 2263 
nyc_gdf = nyc_gdf.set_crs(epsg=4326)
nyc_gdf = nyc_gdf.to_crs(epsg=2263)
# set graph crs 
nyc = ox.project_graph(nyc, to_crs=2263)
# compute shortest path with osmnx between each starting and ending point, and then make gdf with linestring
df_bike_rides['start_point'] = df_bike_rides.apply(lambda row: Point(row['Longitude_start'], row['Latitude_start']), axis=1)
df_bike_rides['end_point'] = df_bike_rides.apply(lambda row: Point(row['Longitude_end'], row['Latitude_end']), axis=1)
df_bike_rides
<style scoped> .dataframe tbody tr th:only-of-type { vertical-align: middle; }
.dataframe tbody tr th {
    vertical-align: top;
}

.dataframe thead th {
    text-align: right;
}
</style>
Date Price Start Time Duration Start Location End Location Bike Type Station Name_start Latitude_start Longitude_start geometry_start Station Name_end Latitude_end Longitude_end geometry_end start_point end_point
0 December 16, 2023 $4.81 3:28 AM 26 min 4 Ave & E 12 St Roosevelt Island Tramway Ebike 4 Ave & E 12 St 206199.988145 9.869909e+05 POINT (986990.904 206199.988) Roosevelt Island Tramway 215173.214551 9.971106e+05 POINT (997110.583 215173.215) POINT (986990.9039654374 206199.988145324) POINT (997110.5831078328 215173.2145505548)
1 December 13, 2023 $2.22 1:54 AM 12 min 49 Ave & 21 St Roosevelt Island Tramway Ebike 49 Ave & 21 St 209823.829244 9.983927e+05 POINT (998392.707 209823.829) Roosevelt Island Tramway 215173.214551 9.971106e+05 POINT (997110.583 215173.215) POINT (998392.7073301204 209823.82924439415) POINT (997110.5831078328 215173.2145505548)
2 December 12, 2023 $2.22 6:06 PM 12 min 45 Ave & 21 St Roosevelt Island Tramway Ebike 45 Ave & 21 St 211568.520867 9.987212e+05 POINT (998721.205 211568.521) Roosevelt Island Tramway 215173.214551 9.971106e+05 POINT (997110.583 215173.215) POINT (998721.20481705 211568.5208667933) POINT (997110.5831078328 215173.2145505548)
3 December 10, 2023 $0.00 6:55 PM 4 min 46 Ave & 5 St Center Blvd & 51 Ave Normal Bike 46 Ave & 5 St 211545.339225 9.968545e+05 POINT (996854.495 211545.339) Center Blvd & 51 Ave 210082.440557 9.954471e+05 POINT (995447.100 210082.441) POINT (996854.494964951 211545.33922499855) POINT (995447.099902163 210082.44055688815)
4 December 10, 2023 $2.04 3:03 AM 11 min 44 Dr & Jackson Ave Roosevelt Island Tramway Ebike 44 Dr & Jackson Ave 211500.654971 9.999708e+05 POINT (999970.755 211500.655) Roosevelt Island Tramway 215173.214551 9.971106e+05 POINT (997110.583 215173.215) POINT (999970.7546327892 211500.6549706817) POINT (997110.5831078328 215173.2145505548)
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
88 August 23, 2023 $1.30 7:54 PM 7 min 36 Ave & 10 St Roosevelt Island Tramway Ebike 36 Ave & 10 St 216694.693029 1.000559e+06 POINT (1000559.392 216694.693) Roosevelt Island Tramway 215173.214551 9.971106e+05 POINT (997110.583 215173.215) POINT (1000559.392218714 216694.69302944464) POINT (997110.5831078328 215173.2145505548)
89 August 23, 2023 $0.00 7:30 PM 23 min 35 St & Broadway 36 Ave & 10 St Normal Bike 35 St & Broadway 216281.048071 1.005755e+06 POINT (1005755.045 216281.048) 36 Ave & 10 St 216694.693029 1.000559e+06 POINT (1000559.392 216694.693) POINT (1005755.0449738645 216281.0480711439) POINT (1000559.392218714 216694.69302944464)
90 August 23, 2023 $0.00 6:04 PM 15 min 36 Ave & 10 St 35 St & Broadway Normal Bike 36 Ave & 10 St 216694.693029 1.000559e+06 POINT (1000559.392 216694.693) 35 St & Broadway 216281.048071 1.005755e+06 POINT (1005755.045 216281.048) POINT (1000559.392218714 216694.69302944464) POINT (1005755.0449738645 216281.0480711439)
91 August 23, 2023 $1.67 5:07 PM 9 min Southpoint Park 36 Ave & 10 St Ebike Southpoint Park 213849.768503 9.956525e+05 POINT (995652.508 213849.769) 36 Ave & 10 St 216694.693029 1.000559e+06 POINT (1000559.392 216694.693) POINT (995652.5079534876 213849.7685027907) POINT (1000559.392218714 216694.69302944464)
92 August 23, 2023 $2.04 3:54 AM 11 min 21 St & 43 Ave Roosevelt Island Tramway Ebike 21 St & 43 Ave 212716.179791 9.992669e+05 POINT (999266.925 212716.180) Roosevelt Island Tramway 215173.214551 9.971106e+05 POINT (997110.583 215173.215) POINT (999266.9249979228 212716.17979061004) POINT (997110.5831078328 215173.2145505548)

87 rows × 17 columns

# get nearest node to each start and end point
df_bike_rides['start_node'] = df_bike_rides.apply(lambda row: ox.distance.nearest_nodes(nyc, row['start_point'].x, row['start_point'].y, return_dist=False), axis=1)
df_bike_rides['end_node'] = df_bike_rides.apply(lambda row: ox.distance.nearest_nodes(nyc, row['end_point'].x, row['end_point'].y, return_dist=False), axis=1)
# shortest path with osmnx 
df_bike_rides['shortest_path'] = df_bike_rides.apply(lambda row: ox.shortest_path(nyc, row['start_node'], row['end_node']), axis=1)
# then, for each shortest path, get the edge lengths and sum them
# make edges out of each pair of nodes in shortest path
def get_edge_lengths(shortest_path):
    edge_lengths = []
    for i in range(len(shortest_path)-1):
        edge_lengths.append(nyc[shortest_path[i]][shortest_path[i+1]][0]['length'])
    return edge_lengths

df_bike_rides['edge_lengths'] = df_bike_rides['shortest_path'].apply(get_edge_lengths)

# sum edge lengths
df_bike_rides['distance'] = df_bike_rides['edge_lengths'].apply(lambda x: sum(x))
# convert distance from meters to miles 
df_bike_rides['distance'] = df_bike_rides['distance'] * 0.000621371


# describe distance
df_bike_rides['distance'].describe()
count    87.000000
mean      2.877928
std       2.371434
min       0.000000
25%       1.000427
50%       2.098547
75%       4.602647
max      10.226281
Name: distance, dtype: float64
# how many total miles biked (approximate)? 
df_bike_rides['distance'].sum()

# pretty close! citi bike app reports 255 miles biked 
250.37970150434506
# make geopandas dataframe with line geometry from shortest path
df_bike_rides['geometry'] = df_bike_rides.apply(lambda x: LineString([(x['Longitude_start'], x['Latitude_start']), (x['Longitude_end'], x['Latitude_end'])]), axis=1)
#df_bike_rides['geometry'] = df_bike_rides.apply(lambda x: ox.make_folium_polyline(nyc, x['shortest_path'], edge_color='red', edge_width=2, edge_opacity=1), axis=1)
df_bike_rides_gdf = gpd.GeoDataFrame(df_bike_rides, geometry='geometry')


# generate route colors by month 


# turn date column into datetime
df_bike_rides_gdf['Date'] = pd.to_datetime(df_bike_rides_gdf['Date'])

# get month from date
df_bike_rides_gdf['Month'] = df_bike_rides_gdf['Date'].apply(lambda x: calendar.month_name[x.month])

# get unique months
months = df_bike_rides_gdf['Month'].unique()

# generate colors for each month

colors = []
# split up a color map into color for each month 
cmap = plt.cm.get_cmap('plasma')
for i in range(len(months)):
    colors.append(cmap(i/len(months)))




# make dictionary of month to color
month_to_color = {}
for i in range(len(months)):
    month_to_color[months[i]] = colors[i]

# map month to color
df_bike_rides_gdf['color'] = df_bike_rides_gdf.apply(lambda row: month_to_color[row['Month']], axis=1)


/var/folders/rz/q4x269y17917jm35bvx33y6m0000gn/T/ipykernel_6716/2337082980.py:17: MatplotlibDeprecationWarning: The get_cmap function was deprecated in Matplotlib 3.7 and will be removed two minor releases later. Use ``matplotlib.colormaps[name]`` or ``matplotlib.colormaps.get_cmap(obj)`` instead.
  cmap = plt.cm.get_cmap('plasma')
# generate dynamic route widths based on number of rides on that route
# group rides by starting and ending station
df_bike_rides_gdf_grouped = df_bike_rides_gdf.groupby(['Start Location', 'End Location']).agg('count').reset_index()

# merge with df bike rides gdf to make a dynamic width column
df_bike_rides_gdf = df_bike_rides_gdf.merge(df_bike_rides_gdf_grouped[['Start Location', 'End Location', 'Date']], how='left', on=['Start Location', 'End Location'])
df_bike_rides_gdf = df_bike_rides_gdf.rename(columns={'Date_y': 'num_rides', 'Date_x': 'Date'})

# describe num rides
df_bike_rides_gdf['num_rides'].describe()

# add offset to num rides for visual aesthetics
df_bike_rides_gdf['num_rides'] = df_bike_rides_gdf['num_rides'] + 2
fig, ax = plt.subplots(figsize=(15,15))

# zoom in to range of df_bike_rides['shortest_path'], but use (north, south, east, west) instead of (west, south, east, north)
bbox = df_bike_rides_gdf.total_bounds

north, west, south, east = bbox[3], bbox[0], bbox[1], bbox[2]

# pad bbox by PAD
PAD = 10000
north += PAD/6
south -= PAD/6
east += PAD/4
west -= PAD/4


bbox_corrected = (north, south, east, west)

# add text box on bottom right of plot 
text = '2023 Citi Bike Trips'
ax.text(0.925, 0.1, text, transform=ax.transAxes, fontsize=20, color='black', ha='right', va='bottom', bbox=dict(facecolor='white', alpha=1, edgecolor='black', pad=10))


# add legend
import matplotlib.patches as mpatches
patches = []
for month in months:
    patches.append(mpatches.Patch(color=month_to_color[month], label=month))
ax.legend(handles=patches, loc='upper left')

ox.plot.plot_graph(nyc, ax=ax, bbox=bbox_corrected, node_size=0, edge_linewidth=0.5, edge_color='grey', edge_alpha=0.5, show=False, save=False, close=False, filepath="images/citibike_routes.png")

# plot shortest routes with ox
ox.plot.plot_graph_routes(nyc, df_bike_rides['shortest_path'], route_colors=df_bike_rides_gdf.color.values, route_linewidths=df_bike_rides_gdf.num_rides.values, route_alpha=0.65, orig_dest_size=50, ax=ax, show=False, save=False, close=False, filepath="images/citibike_routes.png")



# add basemap (this will only work properly if you add an API key, wont put mine here for obvious reasons)
#YOUR_STADIA_API_KEY = ''
#ctx.add_basemap(ax, crs=nyc_gdf.crs.to_string(), source='https://tiles.stadiamaps.com/data/satellite/{z}/{x}/{y}.jpg?api_key='+YOUR_STADIA_API_KEY, alpha=0.9)

plt.show()

# save figure
fig.savefig('images/citibike_routes.png', dpi=300, bbox_inches='tight', pad_inches=0.0)


/Users/mattfranchi/anaconda3/envs/geospatial/lib/python3.12/site-packages/osmnx/plot.py:309: UserWarning: *c* argument looks like a single numeric RGB or RGBA sequence, which should be avoided as value-mapping will have precedence in case its length matches with *x* & *y*.  Please use the *color* keyword-argument or provide a 2D array with a single row if you intend to specify the same RGB or RGBA value for all points.
  ax.scatter(x, y, s=orig_dest_size, c=route_color, alpha=route_alpha, edgecolor="none")

png