Skip to content

Commit

Permalink
#1139 add 5 additional feature class
Browse files Browse the repository at this point in the history
  • Loading branch information
chmnata committed Feb 24, 2025
1 parent b2e4c3b commit e3010aa
Show file tree
Hide file tree
Showing 3 changed files with 28 additions and 7 deletions.
9 changes: 9 additions & 0 deletions gis/centreline/readme.md
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,10 @@ Segments are stored in the partitioned table `gis_core.centreline`. The latest v

Currently we are including only the following types:

> [!IMPORTANT]
> On 2025-02-24, we started to include the following five additional feature types: Bus lane, Trails, Access Roads, Other Ramp, and Laneway, in order to ensure consistency with MOVE.
> On 2024-02-19, we started to include `Other`.
* 'Expressway'
* 'Expressway Ramp'
* 'Major Arterial'
Expand All @@ -38,6 +42,11 @@ Currently we are including only the following types:
* 'Local'
* 'Pending'
* 'Other' (version >= `2024-02-19`)
* 'Buslane' (verion >= `2025-02-24`)
* 'Access Road' (verion >= `2025-02-24`)
* 'Trails' (verion >= `2025-02-24`)
* 'Other Ramp' (verion >= `2025-02-24`)
* 'Laneway' (verion >= `2025-02-24`)

#### Directionality

Expand Down
3 changes: 3 additions & 0 deletions gis/gccview/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -91,6 +91,7 @@ The pipeline consists of two files, `gcc_puller_functions.py` for the functions
- con (used when manually pull): the path to the credential config file. Default is ~/db.cfg
- primary_key (used when pulling an audited table): primary key for this layer, returned from dictionary pk_dict when pulling for the Airflow DAG, set it manually when pulling a layer yourself.
- is_partitioned (Boolean): True if the layer will be inserted as a child table part of a parent table, False if the layer will be neither audited nor partitioned.
- include_additional_layers (Boolean): True if pulling additional layers for centreline.

In the DAG file, the arguments for each layer are stored in dictionaries called "bigdata_layers" and "ptc_layers", in the order above. The DAG will be executed once every 3 months, particularly on the 15th of every March, June, September, and December every year. The DAG will pull either audited table or partitioned table since the "is_partitioned" argument is not stored in dictionaries and are set to default value True.

Expand Down Expand Up @@ -140,6 +141,8 @@ There are 7 inputs that can be entered.

`is_partitioned`: Whether table will be a child table of a parent table or with no feature, specify the option on the command line will set this option to True; while not specifying will give the default False.

`include_additional_layers`: Whether additional layer should be pulled (only applicable for centreline, specify the option on the command line will set this option to True; while not specifying will give the default False.

Example of pulling the library layer (table with no feature) to the gis schema.


Expand Down
23 changes: 16 additions & 7 deletions gis/gccview/gcc_puller_functions.py
Original file line number Diff line number Diff line change
Expand Up @@ -285,7 +285,7 @@ def to_time(input):
time = datetime.datetime.fromtimestamp(abs(input)/1000).strftime('%Y-%m-%d %H:%M:%S')
return time

def get_data(mapserver, layer_id, max_number = None, record_max = None):
def get_data(mapserver, layer_id, max_number = None, record_max = None, include_additional_feature=False):
"""
Function to retreive layer data from GCCView rest api
Expand All @@ -302,7 +302,10 @@ def get_data(mapserver, layer_id, max_number = None, record_max = None):
record_max : integer
Number for parameter `resultRecordCount` in the query, indicating the number of rows this query is going to fetch
include_additional_layers : bool
Boolean flag to include additional 5 feature codes (Trails, Busway, Laneway, Acess Road, and Other Ramp)
Returns
--------
return_json : json
Expand All @@ -312,7 +315,10 @@ def get_data(mapserver, layer_id, max_number = None, record_max = None):
base_url = f"https://insideto-gis.toronto.ca/arcgis/rest/services/{mapserver}/MapServer/{layer_id}/query"
# Exception if the data we want to get is centreline
if mapserver == 'cot_geospatial' and layer_id == 2:
where = "\"FEATURE_CODE_DESC\" IN ('Collector','Collector Ramp','Expressway','Expressway Ramp','Local','Major Arterial','Major Arterial Ramp','Minor Arterial','Minor Arterial Ramp','Pending', 'Other')"
if include_additional_feature:
where = "\"FEATURE_CODE_DESC\" IN ('Collector','Collector Ramp','Expressway','Expressway Ramp','Local','Major Arterial','Major Arterial Ramp','Minor Arterial','Minor Arterial Ramp','Pending', 'Other', 'Trails', 'Busway', 'Laneway', 'Other Ramp', 'Access Road')"
else:
where = "\"FEATURE_CODE_DESC\" IN ('Collector','Collector Ramp','Expressway','Expressway Ramp','Local','Major Arterial','Major Arterial Ramp','Minor Arterial','Minor Arterial Ramp','Pending', 'Other')"
elif mapserver == 'cot_geospatial27' and layer_id == 41:
where = "OBJECTID>0"
else:
Expand Down Expand Up @@ -536,7 +542,7 @@ def update_table(output_table, insert_column, excluded_column, primary_key, sche
return successful_execution
#-------------------------------------------------------------------------------------------------------
# base main function, also compatible with Airflow
def get_layer(mapserver_n, layer_id, schema_name, is_audited, cred = None, con = None, primary_key = None, is_partitioned = True):
def get_layer(mapserver_n, layer_id, schema_name, is_audited, cred = None, con = None, primary_key = None, is_partitioned = True, include_additional_layers = False):
"""
This function calls to the GCCview rest API and inserts the outputs to the output table in the postgres database.
Expand Down Expand Up @@ -590,7 +596,7 @@ def get_layer(mapserver_n, layer_id, schema_name, is_audited, cred = None, con =
LOGGER.error("Non-audited tables do not use the primary key.")
#--------------------------------
#get first data pull (no offset), create tables.
return_json = get_data(mapserver, layer_id)
return_json = get_data(mapserver, layer_id, include_additional_layers)
if is_audited:
(insert_column, excluded_column) = create_audited_table(output_table, return_json, schema_name, primary_key, con)
elif is_partitioned:
Expand All @@ -605,7 +611,7 @@ def get_layer(mapserver_n, layer_id, schema_name, is_audited, cred = None, con =
keep_adding = find_limit(return_json) #checks if all records fetched
if keep_adding:
#get next batch using offset (max_number)
return_json = get_data(mapserver, layer_id, max_number = total, record_max = record_count)
return_json = get_data(mapserver, layer_id, max_number = total, record_max = record_count, include_additional_layers)
LOGGER.info('%s records from [mapserver: %s, layerID: %d] have been inserted into %s', total, mapserver, layer_id, output_table)

if is_audited:
Expand All @@ -630,7 +636,9 @@ def get_layer(mapserver_n, layer_id, schema_name, is_audited, cred = None, con =
help = 'The path to the credential config file')
@click.option('--is-partitioned', '-p', is_flag=True, show_default=True, default=False,
help = 'Whether the table is supposed to be partitioned (T) or not partitioned (F)')
def manual_get_layer(mapserver, layer_id, schema_name, is_audited, primary_key, con, is_partitioned=True):
@click.option('--include_additional_layers', '-a', is_flag=True, show_default=True, default=False,
help = 'Whether additional layer should be pulled (only applicable for centreline')
def manual_get_layer(mapserver, layer_id, schema_name, is_audited, primary_key, con, is_partitioned=True, include_additional_layers=False):
"""
This script pulls a GIS layer from GCC servers into the databases of
the Data and Analytics Unit.
Expand All @@ -652,6 +660,7 @@ def manual_get_layer(mapserver, layer_id, schema_name, is_audited, primary_key,
primary_key = primary_key,
con=connection_obj,
is_partitioned = is_partitioned
include_additional_layers = include_additional_layers
)

if __name__ == '__main__':
Expand Down

0 comments on commit e3010aa

Please sign in to comment.