This repository has been archived by the owner on May 29, 2024. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 3
/
Copy pathtop_stations.py
executable file
·117 lines (96 loc) · 3.88 KB
/
top_stations.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
#!/usr/bin/python2.7
# Author: Ben Smithgall
import requests
import math
import operator
import itertools
import networkx as nx
from cluster import KMeansClustering
STATIONS_ENDPOINT = 'http://citibikenyc.com/stations/json'
DIST_ENDPOINT = 'http://appservices.citibikenyc.com/data2/stations.php'
def prep_stations(url):
"""Returns a list where each element of that list is a station
retreived from the endpoint passed in above"""
stations = []
_stations = requests.get(url).json()
for _station in _stations['stationBeanList']:
if _station['statusKey'] == 1:
stations.append([_station['stationName'], _station['id'],
_station['availableDocks'], _station['totalDocks'],
_station['latitude'], _station['longitude']])
return stations
def cluster_stations(stations, empty='empty'):
"""Uses the cluster library to perform kmeans geographical kmeans
clustering on the input stations list. Expects a list of that
format returned by prep_stations, and returns a list similar
to the input list with the cluster number of each element added
"""
if empty == 'empty':
tocluster = [i for i in stations if (i[3] - i[2])/float(i[3]) < .2]
else:
tocluster = [i for i in stations if (i[2])/float(i[3]) < .2]
cl = KMeansClustering([(i[4], i[5]) for i in tocluster])
clusters = cl.getclusters(4)
# Note that this returns a list of lists of lat/long tuples. We're
# going to have to re-associate them back to the rest of the stations
clustered = []
for ix, i in enumerate(clusters):
for j in i:
for k in tocluster:
if (j[0], j[1]) == (k[4], k[5]):
clustered.append([k[0], k[1], k[2],
k[3], k[4], k[5], ix+1])
return clustered
def haversine(long1, lat1, long2, lat2):
R = 6371 # Earth mean radius (in km)
delta_long = long2 - long1
delta_lat = lat2 - lat1
a = math.sin(delta_lat/2)**2 + math.cos(lat1) * \
math.cos(lat2) * math.sin(delta_long/2)**2
c = 2 * math.asin(min(1,math.sqrt(a)))
d = R * c
return d
def haversine_distance(long1, lat1, long2, lat2):
return haversine(math.radians(long1), math.radians(lat1),
math.radians(long2), math.radians(lat2))
def get_graph_breakdown(clustered):
"""Take clustered, and explode it outward into a list
of edges. that can be added to a networkx as graph using
add_weighted_edges_from"""
edges, output = [], []
combs = itertools.combinations(clustered, 2)
for i in combs:
edge1 = i[0][0]
edge2 = i[1][0]
dist = haversine_distance(i[0][5], i[0][4],
i[1][5], i[1][4])
if dist < .8:
edges.append((edge1, edge2, dist))
G = nx.DiGraph()
G.add_weighted_edges_from(edges)
degcent = nx.degree_centrality(G).items()
for i in clustered:
for j in degcent:
if i[0] == j[0]:
output.append([i[0], i[1], i[2],
i[3], i[4], i[5], i[6], j[1]])
return output
def make_recs(graph_output, dist_url):
recs, nearby = [], []
_dists = requests.get(dist_url).json()
nearbyLookup = dict((i['id'], [j['id'] for j in i['nearbyStations']]) for i in _dists['results'])
for i in sorted(graph_output, key=operator.itemgetter(7), reverse=True):
if i[1] not in nearby:
recs.append(i)
nearby.append(nearbyLookup[i[1]])
if len(recs) > 4:
break
return recs
def get_recs(empty):
output = []
stations_toclutser = prep_stations(STATIONS_ENDPOINT)
stations_cluster = cluster_stations(stations_toclutser, empty)
for i in xrange(1, 5):
cluster = [j for j in stations_cluster if j[-1] == i]
output.extend(make_recs(get_graph_breakdown(cluster), DIST_ENDPOINT))
return output