Skip to content

Create aws.yml

Create aws.yml #2

Workflow file for this run

name: List S3 Objects - AWS
on:
pull_request:
env:
AWS_REGION: eu-west-1
AWS_ROLE_ARN: "arn:aws:iam::719197435995:role/DbtSparkTestingActions"
S3_BUCKET: "dbt-testing-automation"
permissions:
id-token: write # This is required for requesting the JWT
contents: read # This is required for actions/checkout
jobs:
list_s3_objects:
name: list_s3_objects
runs-on: ubuntu-latest
defaults:
run:
# Run tests from integration_tests sub dir
working-directory: ./integration_tests
strategy:
fail-fast: false
matrix:
dbt_version: ["1.*"]
warehouse: ["spark_iceberg"]
steps:
- name: Configure AWS CLI with OIDC role assumption
uses: aws-actions/configure-aws-credentials@v4
with:
role-to-assume: ${{ env.AWS_ROLE_ARN }}
aws-region: ${{ env.AWS_REGION }}
- name: Extract AWS credentials
run: |
echo "AWS_ACCESS_KEY_ID=$(aws configure get aws_access_key_id)" >> $GITHUB_ENV
echo "AWS_SECRET_ACCESS_KEY=$(aws configure get aws_secret_access_key)" >> $GITHUB_ENV
echo "AWS_SESSION_TOKEN=$(aws configure get aws_session_token)" >> $GITHUB_ENV
- name: List objects from S3 bucket
run: |
aws s3 ls s3://${{ env.S3_BUCKET }} --recursive
# You can now use the credentials in subsequent steps like this:
- name: Example Next Step
env:
AWS_ACCESS_KEY_ID: ${{ env.AWS_ACCESS_KEY_ID }}
AWS_SECRET_ACCESS_KEY: ${{ env.AWS_SECRET_ACCESS_KEY }}
AWS_SESSION_TOKEN: ${{ env.AWS_SESSION_TOKEN }}
run: |
# Your commands here will have access to the temporary credentials
echo "Credentials are available in this step"
- name: Check out
uses: actions/checkout@v4
# Remove '*' and replace '.' with '_' in DBT_VERSION & set as SCHEMA_SUFFIX.
# SCHEMA_SUFFIX allows us to run multiple versions of dbt in parallel without overwriting the output tables
- name: Set SCHEMA_SUFFIX env
run: echo "SCHEMA_SUFFIX=$(echo ${DBT_VERSION%.*} | tr . _)" >> $GITHUB_ENV
env:
DBT_VERSION: ${{ matrix.dbt_version }}
- name: Configure Docker credentials
uses: docker/login-action@v2
with:
username: ${{ secrets.DOCKERHUB_SNOWPLOWCI_READ_USERNAME }}
password: ${{ secrets.DOCKERHUB_SNOWPLOWCI_READ_PASSWORD }}
- name: Set warehouse variables
id: set_warehouse
run: |
WAREHOUSE_PLATFORM=$(echo ${{ matrix.warehouse }} | cut -d'_' -f1)
WAREHOUSE_SPECIFIC=$(echo ${{ matrix.warehouse }} | cut -s -d'_' -f2)
echo "WAREHOUSE_PLATFORM=${WAREHOUSE_PLATFORM}" >> $GITHUB_ENV
echo "WAREHOUSE_SPECIFIC=${WAREHOUSE_SPECIFIC}" >> $GITHUB_ENV
echo "warehouse_platform=${WAREHOUSE_PLATFORM}" >> $GITHUB_OUTPUT
echo "warehouse_specific=${WAREHOUSE_SPECIFIC}" >> $GITHUB_OUTPUT
# Remove '*' and replace '.' with '_' in DBT_VERSION & set as SCHEMA_SUFFIX.
# SCHEMA_SUFFIX allows us to run multiple versions of dbt in parallel without overwriting the output tables
- name: Set SCHEMA_SUFFIX env
run: echo "SCHEMA_SUFFIX=$(echo ${DBT_VERSION%.*} | tr . _)" >> $GITHUB_ENV
env:
DBT_VERSION: ${{ matrix.dbt_version }}
- name: Set DEFAULT_TARGET env
run: |
echo "DEFAULT_TARGET=${{matrix.warehouse}}" >> $GITHUB_ENV
- name: Python setup
uses: actions/setup-python@v4
with:
python-version: "3.8.x"
- name: Pip cache
uses: actions/cache@v3
with:
path: ~/.cache/pip
key: ${{ runner.os }}-pip-${{ matrix.dbt_version }}-${{env.WAREHOUSE_PLATFORM}}
restore-keys: |
${{ runner.os }}-pip-${{ matrix.dbt_version }}-${{env.WAREHOUSE_PLATFORM}}
- name: Install spark dependencies
run: |
pip install --upgrade pip wheel setuptools
pip install -Iv "dbt-${{ env.WAREHOUSE_PLATFORM }}[PyHive]"==${{ matrix.dbt_version }} --upgrade
dbt deps
if: ${{env.WAREHOUSE_PLATFORM == 'spark'}}
- name: Install Docker Compose
run: |
sudo curl -L "https://github.com/docker/compose/releases/download/1.29.2/docker-compose-$(uname -s)-$(uname -m)" -o /usr/local/bin/docker-compose
sudo chmod +x /usr/local/bin/docker-compose
- name: Build and start Spark cluster
working-directory: .github/workflows/spark_deployment
run: |
docker-compose up -d
echo "Waiting for Spark services to start..."
sleep 90
if: ${{env.WAREHOUSE_PLATFORM == 'spark'}}
- name: "Pre-test: Drop ci schemas"
run: |
dbt run-operation post_ci_cleanup --target ${{matrix.warehouse}}
- name: Run tests
run: ./.scripts/integration_test.sh -d ${{matrix.warehouse}}
- name: "Post-test: Drop ci schemas"
run: |
dbt run-operation post_ci_cleanup --target ${{matrix.warehouse}}