Skip to content

Commit

Permalink
Merge remote-tracking branch 'origin/next'
Browse files Browse the repository at this point in the history
  • Loading branch information
ynaim94-harrys committed Mar 14, 2022
2 parents 5629058 + 8795452 commit 250083f
Show file tree
Hide file tree
Showing 15 changed files with 71 additions and 47 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/codeql-analysis.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ jobs:

steps:
- name: Checkout repository
uses: actions/checkout@v2
uses: actions/checkout@v3

- name: Initialize CodeQL
uses: github/codeql-action/init@v1
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/labeler.yml
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,6 @@ jobs:
apply-labels:
runs-on: ubuntu-latest
steps:
- uses: actions/labeler@v3
- uses: actions/labeler@v4
with:
repo-token: "${{ secrets.GITHUB_TOKEN }}"
8 changes: 4 additions & 4 deletions .github/workflows/lint.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -16,9 +16,9 @@ jobs:

steps:
- name: Checkout out code
uses: actions/checkout@v2
uses: actions/checkout@v3
- name: Set up Python 3.7
uses: actions/setup-python@v2
uses: actions/setup-python@v3
with:
python-version: 3.7
- name: Cache pip
Expand All @@ -45,9 +45,9 @@ jobs:

steps:
- name: Checkout out code
uses: actions/checkout@v2
uses: actions/checkout@v3
- name: Set up Python 3.7
uses: actions/setup-python@v2
uses: actions/setup-python@v3
with:
python-version: 3.7
- name: Cache pip
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/publish-latest-remote.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ jobs:

steps:
- name: Check out the repo
uses: actions/checkout@v2
uses: actions/checkout@v3

- name: Log in to the Container registry
uses: docker/login-action@v1
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/publish-latest.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ jobs:

steps:
- name: Check out the repo
uses: actions/checkout@v2
uses: actions/checkout@v3

- name: Log in to the Container registry
uses: docker/login-action@v1
Expand Down
4 changes: 2 additions & 2 deletions .github/workflows/unittest.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -14,9 +14,9 @@ jobs:
runs-on: ubuntu-latest
steps:
- name: Checkout out code
uses: actions/checkout@v2
uses: actions/checkout@v3
- name: Set up Python 3.7
uses: actions/setup-python@v2
uses: actions/setup-python@v3
with:
python-version: 3.7
- name: Cache pip
Expand Down
8 changes: 4 additions & 4 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@ repos:
hooks:
- id: isort
- repo: https://github.com/psf/black
rev: 21.12b0
rev: 22.1.0
hooks:
- id: black
- repo: https://gitlab.com/pycqa/flake8
Expand All @@ -56,9 +56,9 @@ repos:
hooks:
- id: mypy
additional_dependencies:
- "mypy-boto3~=1.20"
- "types-PyYAML==6.0.3"
- "types-setuptools==57.4.7"
- "mypy-boto3~=1.21"
- "types-PyYAML==6.0.4"
- "types-setuptools==57.4.9"
- "types-simplejson==3.17.3"
- "types-tabulate==0.8.5"
- "types-termcolor==1.1.3"
Expand Down
2 changes: 1 addition & 1 deletion docs/source/conf.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@
author = "Data Engineering at Harry's"

# TODO(tom): Extract from setup.py
version = "1.58"
version = "1.59"

# -- General configuration ---------------------------------------------------

Expand Down
6 changes: 3 additions & 3 deletions python/etl/extract/database_extractor.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@ def options_info(self) -> List[str]:

def use_sampling_with_table(self, size: int) -> bool:
"""Return True iff option `--use-sampling` appeared and table is large enough (> 100MB)."""
return self.use_sampling and (size > 100 * 1024 ** 2)
return self.use_sampling and (size > 100 * 1024**2)

def select_min_partition_size(self, size: int) -> int:
"""
Expand All @@ -49,8 +49,8 @@ def select_min_partition_size(self, size: int) -> int:
* w/o sampling: 10MB
"""
if self.use_sampling_with_table(size):
return 100 * 1024 ** 2
return 10 * 1024 ** 2
return 100 * 1024**2
return 10 * 1024**2

def maximize_partitions(self, table_size: int) -> int:
"""
Expand Down
23 changes: 11 additions & 12 deletions python/etl/templates/text/ondemand_rebuild_pipeline.json
Original file line number Diff line number Diff line change
Expand Up @@ -133,7 +133,7 @@
"name": "Arthur Load (EC2)",
"type": "ShellCommandActivity",
"parent": { "ref": "ArthurCommandParent" },
"command": "/tmp/redshift_etl/venv/bin/arthur.py --config /tmp/redshift_etl/config/ load --prolix --prefix ${object_store.s3.prefix} --concurrent-extract",
"command": "/tmp/redshift_etl/venv/bin/arthur.py --config /tmp/redshift_etl/config/ load --prolix --prefix ${object_store.s3.prefix} --concurrent-extract #{myExtraLoadFlags}",
"dependsOn": { "ref": "ArthurTerminateSessions" }
},
{
Expand All @@ -144,23 +144,14 @@
"command": "bash /tmp/redshift_etl/bin/sync_env.sh -y ${object_store.s3.bucket_name} ${object_store.s3.prefix} ${object_store.s3.prefix}/current",
"dependsOn": { "ref": "ArthurLoad" }
},
{
"id": "ArthurUnload",
"name": "Arthur Unload (EC2)",
"type": "ShellCommandActivity",
"parent": {"ref": "ArthurCommandParent"},
"command": "/tmp/redshift_etl/venv/bin/arthur.py --config /tmp/redshift_etl/config/ unload --keep-going --prolix --prefix ${object_store.s3.prefix}",
"dependsOn": {"ref": "ArthurLoad"}
},
{
"id": "SendHealthCheckAfterEtl",
"name": "Send Health Check After ETL (EC2)",
"type": "ShellCommandActivity",
"parent": { "ref": "ShellCommandParent" },
"command": "bash /tmp/redshift_etl/bin/send_health_check.sh",
"dependsOn": [
{ "ref": "PublishAndBackup" },
{ "ref": "ArthurUnload" }
{ "ref": "PublishAndBackup" }
],
"onSuccess": { "ref": "SuccessNotification" },
"onFail": [
Expand All @@ -177,9 +168,17 @@
"description": "How many hours to allow the pipeline to run before terminating it",
"watermark": "6",
"helpText": "How long can the pipeline run?"
},
{
"id": "myExtraLoadFlags",
"type": "String",
"optional": "true",
"description": "Extra flags to add to the load command",
"helpText": "Extra flags to add to the load command other like skipping the use of staging "
}
],
"values": {
"myTimeout": "6"
"myTimeout": "6",
"myExtraLoadFlags": ""
}
}
33 changes: 29 additions & 4 deletions python/scripts/install_ondemand_rebuild_pipeline.sh
Original file line number Diff line number Diff line change
Expand Up @@ -3,17 +3,18 @@
CURRENT_TIMESTAMP=$(date -u +"%Y-%m-%dT%H:%M:%S")
DEFAULT_TIMEOUT=6

if [[ $# -lt 1 || $# -gt 2 || "$1" = "-h" ]]; then
show_usage_and_exit() {
cat <<USAGE
Rebuild ETL to extract, load (including transforms), and unload data.
Usage: $(basename "$0") <environment> [timeout]
Usage: $(basename "$0") <environment> [timeout] [myExtraLoadFlags]
Optional timeout should be the number of hours pipeline is allowed to run. Defaults to $DEFAULT_TIMEOUT.
Optional myExtraLoadFlags are passed to the rebuild load command
USAGE
exit 0
fi
}

set -o errexit -o nounset

Expand All @@ -25,10 +26,33 @@ if [[ ! -d "$DEFAULT_CONFIG" ]]; then
exit 1
fi

POSITIONAL_ARGS=()
EXTRA_LOAD_FLAGS=()

while [[ $# -gt 0 ]]; do
case $1 in
-h)
show_usage_and_exit
;;
-*)
EXTRA_LOAD_FLAGS+=("$1") # save flag
shift # past argument
;;
*)
POSITIONAL_ARGS+=("$1") # save positional arg
shift # past argument
;;
esac
done

set -- "${POSITIONAL_ARGS[@]}" # restore positional parameters

PROJ_BUCKET=$(arthur.py show_value object_store.s3.bucket_name)
PROJ_ENVIRONMENT="$1"
TIMEOUT="${2:-$DEFAULT_TIMEOUT}"

EXTRA_LOAD_FLAGS_STR="${EXTRA_LOAD_FLAGS[*]:-""}"

# Verify that this bucket/environment pair is set up on S3
BOOTSTRAP="s3://$PROJ_BUCKET/$PROJ_ENVIRONMENT/bin/bootstrap.sh"
if ! aws s3 ls "$BOOTSTRAP" > /dev/null; then
Expand Down Expand Up @@ -72,10 +96,11 @@ aws datapipeline put-pipeline-definition \
--pipeline-definition "file://$PIPELINE_DEFINITION_FILE" \
--parameter-values \
myTimeout="$TIMEOUT" \
myExtraLoadFlags="${EXTRA_LOAD_FLAGS_STR}" \
--pipeline-id "$PIPELINE_ID"

set +o xtrace
echo
echo "Your On-Demand Rebuild Pipeline ('$PIPELINE_ID') has been created!"
echo "You can start the pipeline by running the following command:"
echo " aws datapipeline activate-pipeline --pipeline-id "$PIPELINE_ID""
echo " aws datapipeline activate-pipeline --pipeline-id \"$PIPELINE_ID\""
2 changes: 1 addition & 1 deletion requirements-docs.txt
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
myst-parser~=0.16
Sphinx~=4.3
sphinx-autodoc-typehints~=1.12
sphinx-book-theme~=0.1
sphinx-book-theme~=0.2
10 changes: 5 additions & 5 deletions requirements-linters.txt
Original file line number Diff line number Diff line change
@@ -1,18 +1,18 @@
# NOTE When changing versions here, be sure to keep .pre-commit-config.yaml consistent!
black==21.12b0
black==22.1.0
flake8==4.0.1
flake8-bugbear==22.1.11
flake8-comprehensions==3.7.0
flake8-docstrings==1.6.0
flake8-fixme==1.1.1
isort==5.10.1
mypy==0.931
mypy-boto3~=1.20
mypy-boto3~=1.21
mypy-extensions==0.4.3
pre-commit==2.16.0
pre-commit==2.17.0
pyupgrade==2.31.0
types-PyYAML==6.0.3
types-setuptools==57.4.7
types-PyYAML==6.0.4
types-setuptools==57.4.9
types-simplejson==3.17.3
types-tabulate==0.8.5
types-termcolor==1.1.3
12 changes: 6 additions & 6 deletions requirements.txt
Original file line number Diff line number Diff line change
@@ -1,15 +1,15 @@
# This is the set of packages need to run the code. All versions are pinned.
arrow==1.2.1
boto3==1.20.33
botocore~=1.23
arrow==1.2.2
boto3==1.21.13
botocore~=1.24
funcy==1.17
jmespath==0.10.0
jsonschema==3.2.0
pgpasslib==1.1.0
psycopg2-binary==2.9.2
psycopg2-binary==2.9.3
PyYAML==6.0
simplejson==3.17.6
tabulate==0.8.9
termcolor==1.1.0
tqdm==4.62.3
watchtower==2.1.1
tqdm==4.63.0
watchtower==3.0.0
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
from setuptools import find_packages, setup

ARTHUR_VERSION = "1.59.2"
ARTHUR_VERSION = "1.60.0"


setup(
Expand Down

0 comments on commit 250083f

Please sign in to comment.