Modified according to PR comments

MobilityData · Jan 28, 2025 · a1b84ee · a1b84ee
1 parent d4d6521
commit a1b84ee
Show file tree

Hide file tree

Showing 4 changed files with 17 additions and 15 deletions.
diff --git a/api/src/shared/common/db_utils.py b/api/src/shared/common/db_utils.py
@@ -79,7 +79,7 @@ def get_all_gtfs_feeds_query(
     include_wip: bool = False,
     db_session: Session = None,
 ) -> Query[any]:
-    """Get the DB query to use to retrieve all the GTFS feeds, filtering out the WIP is needed"""
+    """Get the DB query to use to retrieve all the GTFS feeds, filtering out the WIP if needed"""
 
     feed_query = db_session.query(Gtfsfeed)
 
@@ -165,7 +165,7 @@ def get_all_gtfs_rt_feeds_query(
     include_wip: bool = False,
     db_session: Session = None,
 ) -> Query:
-    """Get the DB query to use to retrieve all the GTFS rt feeds, filtering out the WIP is needed"""
+    """Get the DB query to use to retrieve all the GTFS rt feeds, filtering out the WIP if needed"""
     feed_query = db_session.query(Gtfsrealtimefeed)
 
     if not include_wip:

diff --git a/api/src/shared/common/error_handling.py b/api/src/shared/common/error_handling.py
@@ -13,8 +13,8 @@
 
 class InternalHTTPException(Exception):
     """
-    This class is used instead of the HTTPException bevause we don't want to depend on fastapi and have to deploy it.
-    At one point this exception needs to be caught and converted to an HTTPException,
+    This class is used instead of the HTTPException because we don't want to depend on fastapi and have to deploy it.
+    At one point this exception needs to be caught and converted to a fastapi HTTPException,
     """
 
     def __init__(self, status_code: int, detail: str) -> None:

diff --git a/functions-python/export_csv/function_config.json b/functions-python/export_csv/function_config.json
@@ -13,8 +13,8 @@
     }
   ],
   "ingress_settings": "ALLOW_INTERNAL_AND_GCLB",
-  "max_instance_request_concurrency": 20,
-  "max_instance_count": 10,
+  "max_instance_request_concurrency": 1,
+  "max_instance_count": 1,
   "min_instance_count": 0,
   "available_cpu": 1
 }
diff --git a/functions-python/export_csv/src/main.py b/functions-python/export_csv/src/main.py
@@ -14,6 +14,7 @@
 #  limitations under the License.
 #
 import argparse
+import logging
 import os
 import re
 
@@ -27,6 +28,7 @@
 from google.cloud import storage
 from geoalchemy2.shape import to_shape
 
+from helpers.logger import Logger
 from shared.database_gen.sqlacodegen_models import Gtfsfeed, Gtfsrealtimefeed
 from collections import OrderedDict
 from shared.common.db_utils import get_all_gtfs_rt_feeds_query, get_all_gtfs_feeds_query
@@ -82,6 +84,8 @@ def export_csv():
     :param request: HTTP request object
     :return: HTTP response object
     """
+    Logger.init_logger()
+    logging.info("Function Started")
     data_collector = collect_data()
     data_collector.write_csv_to_file(csv_file_path)
     return f"Exported {len(data_collector.rows)} feeds to CSV file {csv_file_path}."
@@ -93,7 +97,7 @@ def collect_data() -> DataCollector:
     :return: A filled DataCollector
     """
     db = Database(database_url=os.getenv("FEEDS_DATABASE_URL"))
-    print(f"Using database {db.database_url}")
+    logging.info(f"Using database {db.database_url}")
     try:
         with db.start_db_session() as session:
             gtfs_feeds_query = get_all_gtfs_feeds_query(
@@ -103,7 +107,7 @@ def collect_data() -> DataCollector:
 
             gtfs_feeds = gtfs_feeds_query.all()
 
-            print(f"Retrieved {len(gtfs_feeds)} GTFS feeds.")
+            logging.info(f"Retrieved {len(gtfs_feeds)} GTFS feeds.")
 
             gtfs_rt_feeds_query = get_all_gtfs_rt_feeds_query(
                 include_wip=False,
@@ -112,29 +116,27 @@ def collect_data() -> DataCollector:
 
             gtfs_rt_feeds = gtfs_rt_feeds_query.all()
 
-            print(f"Retrieved {len(gtfs_rt_feeds)} GTFS realtime feeds.")
+            logging.info(f"Retrieved {len(gtfs_rt_feeds)} GTFS realtime feeds.")
 
             data_collector = DataCollector()
 
             for feed in gtfs_feeds:
-                # print(f"Processing feed {feed.stable_id}")
                 data = get_feed_csv_data(feed)
 
                 for key, value in data.items():
                     data_collector.add_data(key, value)
                 data_collector.finalize_row()
-            print(f"Processed {len(gtfs_feeds)} GTFS feeds.")
+            logging.info(f"Processed {len(gtfs_feeds)} GTFS feeds.")
 
             for feed in gtfs_rt_feeds:
-                # print(f"Processing rt feed {feed.stable_id}")
                 data = get_gtfs_rt_feed_csv_data(feed)
                 for key, value in data.items():
                     data_collector.add_data(key, value)
                 data_collector.finalize_row()
-            print(f"Processed {len(gtfs_rt_feeds)} GTFS realtime feeds.")
+            logging.info(f"Processed {len(gtfs_rt_feeds)} GTFS realtime feeds.")
 
     except Exception as error:
-        print(f"Error retrieving feeds: {error}")
+        logging.error(f"Error retrieving feeds: {error}")
         raise Exception(f"Error retrieving feeds: {error}")
     data_collector.write_csv_to_file(csv_file_path)
     return data_collector
@@ -336,7 +338,7 @@ def upload_file_to_storage(source_file_path, target_path):
     Uploads a file to the GCP bucket
     """
     bucket_name = os.getenv("DATASETS_BUCKET_NAME")
-    print(f"Uploading file to bucket {bucket_name} at path {target_path}")
+    logging.info(f"Uploading file to bucket {bucket_name} at path {target_path}")
     bucket = storage.Client().get_bucket(bucket_name)
     blob = bucket.blob(target_path)
     with open(source_file_path, "rb") as file: