Skip to content

Commit

Permalink
Fix blue bike storage job (#121)
Browse files Browse the repository at this point in the history
* Fix blue bike permissions with s3

* Update blue bike storage code to be safer

* Cleanup and remove layer

* Add back layer

* We don't need the layer

* Increase memory, lower timeout

* Add back layer, it's not the issue
  • Loading branch information
devinmatte authored Feb 21, 2025
1 parent 3efb4bd commit 5e690df
Show file tree
Hide file tree
Showing 6 changed files with 326 additions and 328 deletions.
5 changes: 3 additions & 2 deletions ingestor/.chalice/config.json
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
"app_name": "ingestor",
"automatic_layer": true,
"layers": [
"arn:aws:lambda:us-east-1:464622532012:layer:Datadog-Extension:68"
"arn:aws:lambda:us-east-1:464622532012:layer:Datadog-Extension:69"
],
"stages": {
"prod": {
Expand All @@ -30,7 +30,8 @@
},
"bb_store_station_status": {
"iam_policy_file": "policy-bluebikes-store.json",
"lambda_timeout": 90
"lambda_memory_size": 256,
"lambda_timeout": 30
},
"bb_store_station_info": {
"iam_policy_file": "policy-bluebikes-store.json",
Expand Down
17 changes: 4 additions & 13 deletions ingestor/.chalice/policy-bluebikes-calc.json
Original file line number Diff line number Diff line change
Expand Up @@ -11,23 +11,14 @@
"Resource": "arn:*:logs:*:*:*"
},
{
"Action": [
"s3:PutObject",
"s3:GetObject"
],
"Action": ["s3:PutObject", "s3:GetObject"],
"Effect": "Allow",
"Resource": [
"arn:aws:s3:::tm-bluebikes/*"
]
"Resource": ["arn:aws:s3:::tm-bluebikes/*"]
},
{
"Action": [
"s3:ListBucket"
],
"Action": ["s3:ListBucket"],
"Effect": "Allow",
"Resource": [
"arn:aws:s3:::tm-bluebikes"
]
"Resource": ["arn:aws:s3:::tm-bluebikes"]
}
]
}
13 changes: 7 additions & 6 deletions ingestor/.chalice/policy-bluebikes-store.json
Original file line number Diff line number Diff line change
Expand Up @@ -11,13 +11,14 @@
"Resource": "arn:*:logs:*:*:*"
},
{
"Action": [
"s3:PutObject"
],
"Action": ["s3:PutObject", "s3:GetObject"],
"Effect": "Allow",
"Resource": ["arn:aws:s3:::tm-bluebikes/*"]
},
{
"Action": ["s3:ListBucket"],
"Effect": "Allow",
"Resource": [
"arn:aws:s3:::tm-bluebikes/*"
]
"Resource": ["arn:aws:s3:::tm-bluebikes"]
}
]
}
22 changes: 18 additions & 4 deletions ingestor/chalicelib/bluebikes.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import datetime
import sys
from geopy import distance
import json
import numpy as np
Expand All @@ -18,13 +19,26 @@ def get_station_status_key(date, timestamp):
return f"station_status/{date}/{timestamp}/bluebikes.csv"


def store_station_status():
resp = requests.get("https://gbfs.bluebikes.com/gbfs/en/station_status.json")
def get_station_status():
try:
resp = requests.get("https://gbfs.bluebikes.com/gbfs/en/station_status.json", timeout=15)
resp.raise_for_status()
datajson = json.loads(resp.content)
except requests.exceptions.RequestException as e:
sys.exit(f"Error fetching station status: {e}")

return datajson

datajson = json.loads(resp.content)

def store_station_status():
datajson = get_station_status()

timestamp = datajson.get("last_updated")
df = pd.DataFrame.from_records(datajson.get("data").get("stations"))
stations = datajson.get("data", {}).get("stations", [])
if not stations:
print("No stations found in the response.")
return
df = pd.DataFrame.from_records(stations)
df["datetimepulled"] = timestamp

date = datetime.datetime.fromtimestamp(timestamp, TZ).date()
Expand Down
Loading

0 comments on commit 5e690df

Please sign in to comment.