Skip to content

Commit

Permalink
#802 implement a get_readme_docmd, standard html tag
Browse files Browse the repository at this point in the history
  • Loading branch information
gabrielwol committed Dec 21, 2023
1 parent 9b24aea commit bf62b70
Show file tree
Hide file tree
Showing 4 changed files with 17 additions and 13 deletions.
7 changes: 7 additions & 0 deletions dags/dag_functions.py
Original file line number Diff line number Diff line change
Expand Up @@ -129,3 +129,10 @@ def task_fail_slack_alert(
proxy=proxy,
)
return failed_alert.execute(context=context)

def get_readme_docmd(readme_path, dag_name):
import re
contents = open(readme_path, 'r').read()
doc_md_key = '<!-- ' + dag_name + '_doc_md -->'
doc_md_regex = '(?<=' + doc_md_key + '\n)[\s\S]+(?=\n' + doc_md_key + ')'
return re.findall(doc_md_regex, contents)[0]
9 changes: 3 additions & 6 deletions dags/vds_pull_vdsdata.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
import os
import sys
import re
from airflow.decorators import dag, task_group, task
from datetime import datetime, timedelta
from airflow.providers.postgres.hooks.postgres import PostgresHook
Expand All @@ -18,14 +17,12 @@
from volumes.vds.py.vds_functions import (
pull_raw_vdsdata, pull_detector_inventory, pull_entity_locations
)
from dags.dag_functions import task_fail_slack_alert
from dags.dag_functions import task_fail_slack_alert, get_readme_docmd
from dags.custom_operators import SQLCheckOperatorWithReturnValue
from dags.common_tasks import check_jan_1st

doc_md_path = os.path.join(repo_path, 'volumes/vds/readme.md')
contents = open(doc_md_path, 'r').read()
doc_md_regex = '### vds_pull_vdsdata DAG \n[\s\S]+(?=\n#{3})'
DOC_MD = re.findall(doc_md_regex, contents)[0]
README_PATH = os.path.join(repo_path, 'volumes/vds/readme.md')
DOC_MD = get_readme_docmd(README_PATH, DAG_NAME)

default_args = {
'owner': ','.join(DAG_OWNERS),
Expand Down
9 changes: 3 additions & 6 deletions dags/vds_pull_vdsvehicledata.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
import os
import sys
import re
from airflow.decorators import dag, task_group, task
from datetime import datetime, timedelta
from airflow.providers.postgres.hooks.postgres import PostgresHook
Expand All @@ -16,14 +15,12 @@
sys.path.insert(0, repo_path)

from volumes.vds.py.vds_functions import pull_raw_vdsvehicledata
from dags.dag_functions import task_fail_slack_alert
from dags.dag_functions import task_fail_slack_alert, get_readme_docmd
from dags.custom_operators import SQLCheckOperatorWithReturnValue
from dags.common_tasks import check_jan_1st

doc_md_path = os.path.join(repo_path, 'volumes/vds/readme.md')
contents = open(doc_md_path, 'r').read()
doc_md_regex = '### vds_pull_vdsvehicledata DAG \n[\s\S]+(?=\n#{1,3} )'
DOC_MD = re.findall(doc_md_regex, contents)[0]
README_PATH = os.path.join(repo_path, 'volumes/vds/readme.md')
DOC_MD = get_readme_docmd(README_PATH, DAG_NAME)

default_args = {
'owner': ','.join(DAG_OWNERS),
Expand Down
5 changes: 4 additions & 1 deletion volumes/vds/readme.md
Original file line number Diff line number Diff line change
Expand Up @@ -408,6 +408,7 @@ Row count: 189
VDS data is pulled daily at 4AM from ITS Central database by the Airflow DAGs described below.
The DAGs need to be run on-prem to access ITSC database and are hosted for now on Morbius.

<!-- vds_pull_vdsdata_doc_md -->
### vds_pull_vdsdata DAG
<div style="width: 75%";>

Expand Down Expand Up @@ -439,8 +440,9 @@ A daily DAG to pull [VDS data](https://github.com/CityofToronto/bdit_data-source

**`data_checks`**
- `check_rows_vdsdata_div2` runs a row count check on `vds.counts_15min_div2` to check the row count is >= 0.7 * the 60 day average lookback row count. A slack alert is sent if the check fails.
<!-- vds_pull_vdsdata_doc_md -->


<!-- vds_pull_vdsvehicledata_doc_md -->
### vds_pull_vdsvehicledata DAG
<div style="width: 75%";>

Expand Down Expand Up @@ -468,6 +470,7 @@ A daily DAG to pull [VDS data](https://github.com/CityofToronto/bdit_data-source

**`data_checks`**
- `check_rows_veh_speeds` runs a row count check on `vds.veh_speeds_15min` to check the row count is >= 0.7 * the 60 day average lookback row count. A slack alert is sent if the check fails.
<!-- vds_pull_vdsvehicledata_doc_md -->

## Data Ops: something went wrong predictably, how do I fix it?
**Need to retry a task?**
Expand Down

0 comments on commit bf62b70

Please sign in to comment.