From 53fbe362cf978fe1d84c7e6faa21abe32015a4fe Mon Sep 17 00:00:00 2001 From: Gabe Wolofsky <80077912+gabrielwol@users.noreply.github.com> Date: Thu, 13 Feb 2025 11:35:54 -0500 Subject: [PATCH 1/4] #1149 add geojson export to dag --- dags/ecocounter_open_data.py | 22 +++++++++++++++++++++- 1 file changed, 21 insertions(+), 1 deletion(-) diff --git a/dags/ecocounter_open_data.py b/dags/ecocounter_open_data.py index c8631a68a..ce5fd07f1 100644 --- a/dags/ecocounter_open_data.py +++ b/dags/ecocounter_open_data.py @@ -191,7 +191,26 @@ def download_locations_open_data()->str: FROM open_data.cycling_permanent_counts_locations ORDER BY location_dir_id;" \ --csv -o "$EXPORT_PATH/cycling_permanent_counts_locations.csv"''' - + + @task.bash( + env = { + 'HOST': '{{ conn.ecocounter_bot.host }}', + 'LOGIN': '{{ conn.ecocounter_bot.login }}', + 'PGPASSWORD': '{{ conn.ecocounter_bot.password }}', + 'EXPORT_PATH': EXPORT_PATH + } + ) + def download_locations_open_data_geojson()->str: + return '''/usr/bin/ogr2ogr -f "GeoJSON" \ + "$EXPORT_PATH/cycling_permanent_counts_locations.geojson" \ + PG:"host=$HOST dbname=bigdata" \ + -sql "SELECT location_dir_id, location_name, direction, linear_name_full, side_street, + longitude, latitude, centreline_id, bin_size, latest_calibration_study, + first_active, last_active, date_decommissioned, technology + FROM open_data.cycling_permanent_counts_locations + ORDER BY location_dir_id;" \ + -nln cycling_permanent_counts_locations''' + @task.bash() def output_readme()->str: source='/home/airflow/data_scripts/volumes/open_data/sql/cycling_permanent_counts_readme.md' @@ -220,6 +239,7 @@ def status_message(ds = None, **context): update_locations >> [ insert_and_download_data.expand(yr = yrs), download_locations_open_data(), + download_locations_open_data_geojson(), output_readme() ] >> status_message() From 5eeea9f8801f917eead403ff4ad6be06c57f348f Mon Sep 17 00:00:00 2001 From: Gabe Wolofsky <80077912+gabrielwol@users.noreply.github.com> Date: Tue, 18 Feb 2025 15:20:27 -0500 Subject: [PATCH 2/4] #1149 switch to formatting geojson in postgres --- dags/ecocounter_open_data.py | 52 +++++-------------- ...ing_permanent_counts_locations_geojson.sql | 37 +++++++++++++ 2 files changed, 50 insertions(+), 39 deletions(-) create mode 100644 volumes/open_data/sql/create-view-cycling_permanent_counts_locations_geojson.sql diff --git a/dags/ecocounter_open_data.py b/dags/ecocounter_open_data.py index ce5fd07f1..89d97d3d0 100644 --- a/dags/ecocounter_open_data.py +++ b/dags/ecocounter_open_data.py @@ -35,6 +35,12 @@ README_PATH = os.path.join(repo_path, 'volumes/ecocounter/readme.md') DOC_MD = get_readme_docmd(README_PATH, DAG_NAME) EXPORT_PATH = '/home/airflow/open_data/permanent-bike-counters' #'/data/open_data/permanent-bike-counters' +BASH_ENV = { + 'HOST': '{{ conn.ecocounter_bot.host }}', + 'LOGIN': '{{ conn.ecocounter_bot.login }}', + 'PGPASSWORD': '{{ conn.ecocounter_bot.password }}', + 'EXPORT_PATH': EXPORT_PATH +} default_args = { 'owner': ','.join(DAG_OWNERS), @@ -132,14 +138,7 @@ def insert_15min(yr): ) return t.execute(context=context) - @task.bash( - env = { - 'HOST': '{{ conn.ecocounter_bot.host }}', - 'LOGIN': '{{ conn.ecocounter_bot.login }}', - 'PGPASSWORD': '{{ conn.ecocounter_bot.password }}', - 'EXPORT_PATH': EXPORT_PATH - } - ) + @task.bash(env = BASH_ENV) def download_daily_open_data()->str: return '''/usr/bin/psql -h $HOST -U $LOGIN -d bigdata -c \ "SELECT @@ -151,12 +150,7 @@ def download_daily_open_data()->str: --csv -o "$EXPORT_PATH/cycling_permanent_counts_daily.csv"''' @task.bash( - env = { - 'HOST': '{{ conn.ecocounter_bot.host }}', - 'LOGIN': '{{ conn.ecocounter_bot.login }}', - 'PGPASSWORD': '{{ conn.ecocounter_bot.password }}', - 'EXPORT_PATH': EXPORT_PATH - }, + env = BASH_ENV, map_index_template="{{ yr }}" ) def download_15min_open_data(yr)->str: @@ -175,14 +169,7 @@ def download_15min_open_data(yr)->str: insert_daily(yr) >> download_daily_open_data() insert_15min(yr) >> download_15min_open_data(yr) - @task.bash( - env = { - 'HOST': '{{ conn.ecocounter_bot.host }}', - 'LOGIN': '{{ conn.ecocounter_bot.login }}', - 'PGPASSWORD': '{{ conn.ecocounter_bot.password }}', - 'EXPORT_PATH': EXPORT_PATH - } - ) + @task.bash(env = BASH_ENV) def download_locations_open_data()->str: return '''/usr/bin/psql -h $HOST -U $LOGIN -d bigdata -c \ "SELECT location_dir_id, location_name, direction, linear_name_full, side_street, @@ -192,24 +179,11 @@ def download_locations_open_data()->str: ORDER BY location_dir_id;" \ --csv -o "$EXPORT_PATH/cycling_permanent_counts_locations.csv"''' - @task.bash( - env = { - 'HOST': '{{ conn.ecocounter_bot.host }}', - 'LOGIN': '{{ conn.ecocounter_bot.login }}', - 'PGPASSWORD': '{{ conn.ecocounter_bot.password }}', - 'EXPORT_PATH': EXPORT_PATH - } - ) + @task.bash(env = BASH_ENV) def download_locations_open_data_geojson()->str: - return '''/usr/bin/ogr2ogr -f "GeoJSON" \ - "$EXPORT_PATH/cycling_permanent_counts_locations.geojson" \ - PG:"host=$HOST dbname=bigdata" \ - -sql "SELECT location_dir_id, location_name, direction, linear_name_full, side_street, - longitude, latitude, centreline_id, bin_size, latest_calibration_study, - first_active, last_active, date_decommissioned, technology - FROM open_data.cycling_permanent_counts_locations - ORDER BY location_dir_id;" \ - -nln cycling_permanent_counts_locations''' + return '''/usr/bin/psql -h $HOST -U $LOGIN -d bigdata -c \ + "SELECT featurecollection FROM open_data.cycling_permanent_counts_locations_geojson;" \ + --tuples-only -o "$EXPORT_PATH/cycling_permanent_counts_locations.geojson"''' @task.bash() def output_readme()->str: diff --git a/volumes/open_data/sql/create-view-cycling_permanent_counts_locations_geojson.sql b/volumes/open_data/sql/create-view-cycling_permanent_counts_locations_geojson.sql new file mode 100644 index 000000000..9dd86266f --- /dev/null +++ b/volumes/open_data/sql/create-view-cycling_permanent_counts_locations_geojson.sql @@ -0,0 +1,37 @@ +CREATE OR REPLACE VIEW open_data.cycling_permanent_counts_locations_geojson AS +WITH features AS ( + SELECT json_build_object( + 'type', 'Feature', + 'id', location_dir_id, + 'geometry', st_asgeojson(st_setsrid(st_makepoint(longitude, latitude), 4326))::jsonb, + 'attributes', json_build_object( + 'location_dir_id', location_dir_id, + 'location_name', location_name, + 'direction', direction, + 'linear_name_full', linear_name_full, + 'side_street', side_street, + 'centreline_id', centreline_id, + 'bin_size', bin_size, + 'latest_calibration_study', latest_calibration_study, + 'first_active', first_active, + 'last_active', last_active, + 'date_decommissioned', date_decommissioned, + 'technology', technology + ) + ) AS features + FROM open_data.cycling_permanent_counts_locations +) + +--aggregate features into FeatureCollection +SELECT json_build_object( + 'type', + 'FeatureCollection', + 'features', + json_agg(features.features) +) AS featurecollection +FROM features; + +ALTER VIEW open_data.cycling_permanent_counts_locations_geojson OWNER TO od_admins; + +GRANT SELECT ON open_data.cycling_permanent_counts_locations_geojson TO ecocounter_bot; +GRANT SELECT ON open_data.cycling_permanent_counts_locations_geojson TO bdit_humans; From a75e88508e858609d7473d88d177e4e23b78f9ca Mon Sep 17 00:00:00 2001 From: Gabe Wolofsky <80077912+gabrielwol@users.noreply.github.com> Date: Tue, 18 Feb 2025 15:32:00 -0500 Subject: [PATCH 3/4] #1149 sqlfluff --- ...ing_permanent_counts_locations_geojson.sql | 52 ++++++++++--------- 1 file changed, 27 insertions(+), 25 deletions(-) diff --git a/volumes/open_data/sql/create-view-cycling_permanent_counts_locations_geojson.sql b/volumes/open_data/sql/create-view-cycling_permanent_counts_locations_geojson.sql index 9dd86266f..2e43bed36 100644 --- a/volumes/open_data/sql/create-view-cycling_permanent_counts_locations_geojson.sql +++ b/volumes/open_data/sql/create-view-cycling_permanent_counts_locations_geojson.sql @@ -1,34 +1,36 @@ CREATE OR REPLACE VIEW open_data.cycling_permanent_counts_locations_geojson AS WITH features AS ( - SELECT json_build_object( - 'type', 'Feature', - 'id', location_dir_id, - 'geometry', st_asgeojson(st_setsrid(st_makepoint(longitude, latitude), 4326))::jsonb, - 'attributes', json_build_object( - 'location_dir_id', location_dir_id, - 'location_name', location_name, - 'direction', direction, - 'linear_name_full', linear_name_full, - 'side_street', side_street, - 'centreline_id', centreline_id, - 'bin_size', bin_size, - 'latest_calibration_study', latest_calibration_study, - 'first_active', first_active, - 'last_active', last_active, - 'date_decommissioned', date_decommissioned, - 'technology', technology - ) - ) AS features + SELECT + json_build_object( + 'type', 'Feature', + 'id', location_dir_id, + 'geometry', st_asgeojson(st_setsrid(st_makepoint(longitude, latitude), 4326))::jsonb, + 'attributes', json_build_object( + 'location_dir_id', location_dir_id, + 'location_name', location_name, + 'direction', direction, + 'linear_name_full', linear_name_full, + 'side_street', side_street, + 'centreline_id', centreline_id, + 'bin_size', bin_size, + 'latest_calibration_study', latest_calibration_study, + 'first_active', first_active, + 'last_active', last_active, + 'date_decommissioned', date_decommissioned, + 'technology', technology + ) + ) AS features FROM open_data.cycling_permanent_counts_locations ) --aggregate features into FeatureCollection -SELECT json_build_object( - 'type', - 'FeatureCollection', - 'features', - json_agg(features.features) -) AS featurecollection +SELECT + json_build_object( + 'type', + 'FeatureCollection', + 'features', + json_agg(features.features) + ) AS featurecollection FROM features; ALTER VIEW open_data.cycling_permanent_counts_locations_geojson OWNER TO od_admins; From 154bb73e022fba81a58d0382e88600b48262e1f7 Mon Sep 17 00:00:00 2001 From: Gabe Wolofsky <80077912+gabrielwol@users.noreply.github.com> Date: Wed, 19 Feb 2025 09:16:28 -0500 Subject: [PATCH 4/4] #1149 attributes -> properties --- .../create-view-cycling_permanent_counts_locations_geojson.sql | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/volumes/open_data/sql/create-view-cycling_permanent_counts_locations_geojson.sql b/volumes/open_data/sql/create-view-cycling_permanent_counts_locations_geojson.sql index 2e43bed36..c40a9e492 100644 --- a/volumes/open_data/sql/create-view-cycling_permanent_counts_locations_geojson.sql +++ b/volumes/open_data/sql/create-view-cycling_permanent_counts_locations_geojson.sql @@ -5,7 +5,7 @@ WITH features AS ( 'type', 'Feature', 'id', location_dir_id, 'geometry', st_asgeojson(st_setsrid(st_makepoint(longitude, latitude), 4326))::jsonb, - 'attributes', json_build_object( + 'properties', json_build_object( 'location_dir_id', location_dir_id, 'location_name', location_name, 'direction', direction,