Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: update to use sm2a for staging promotion #297

Merged
merged 17 commits into from
Feb 14, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
48 changes: 10 additions & 38 deletions .github/workflows/pr.yml
Original file line number Diff line number Diff line change
Expand Up @@ -84,23 +84,6 @@ jobs:
done
echo "added_files_count: ${ADDED_FILES_COUNT}"

# Uses service client creds to get token
# No username/password needed
- name: Get auth token
id: get-token
run: |
echo "Vars: $vars"
response=$(curl -X POST \
${{ vars.STAGING_COGNITO_DOMAIN }}/oauth2/token \
-H "Content-Type: application/x-www-form-urlencoded" \
-d "grant_type=client_credentials" \
-d "client_id=${{ vars.STAGING_CLIENT_ID }}" \
-d "client_secret=${{ secrets.STAGING_CLIENT_SECRET }}"
)

access_token=$(echo "$response" | jq -r '.access_token')
echo "ACCESS_TOKEN=$access_token" >> $GITHUB_OUTPUT

# Makes request to /dataset/publish endpoint
# Outputs only files that were successfully published
# Used by other steps
Expand All @@ -110,23 +93,14 @@ jobs:
id: publish-collections
env:
ADDED_FILES: ${{ steps.changed-files.outputs.added_files }}
WORKFLOWS_URL: ${{ vars.STAGING_WORKFLOWS_URL }}
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
AUTH_TOKEN: ${{ steps.get-token.outputs.ACCESS_TOKEN }}
COMMENT_ID: ${{ steps.init-comment.outputs.COMMENT_ID }}
STAGING_SM2A_ADMIN_USERNAME: ${{ secrets.STAGING_SM2A_ADMIN_USERNAME }}
STAGING_SM2A_ADMIN_PASSWORD: ${{ secrets.STAGING_SM2A_ADMIN_PASSWORD }}
STAGING_SM2A_API_URL: ${{ vars.STAGING_SM2A_API_URL }}
DATASET_DAG_NAME: ${{ vars.DATASET_DAG_NAME }}
run: |
if [ -z "$WORKFLOWS_URL" ]; then
echo "WORKFLOWS_URL is not set"
exit 1
fi

if [ -z "$AUTH_TOKEN" ]; then
echo "AUTH_TOKEN is not set"
exit 1
fi

publish_url="${WORKFLOWS_URL%/}/dataset/publish"
bearer_token=$AUTH_TOKEN
pip install -r ./scripts/requirements.txt

# Track successful publications
all_failed=true
Expand All @@ -141,16 +115,14 @@ jobs:
collection_id=$(jq -r '.collection' "$file")

echo "Publishing $collection_id"
response=$(curl -s -w "%{http_code}" -o response.txt -X POST "$publish_url" \
-H "Content-Type: application/json" \
-H "Authorization: Bearer $AUTH_TOKEN" \
-d "$dataset_config"
)

status_code=$(tail -n1 <<< "$response")
response=$(python3 ./scripts/promote_collection.py "$file" "staging")
echo "Processed file: $file"
status_code=$(echo "$response" | jq -r '.statusCode')
echo "Status Code: $status_code"

# Update status message based on response code
if [ "$status_code" -eq 200 ] || [ "$status_code" -eq 201 ]; then
if [ $status_code -eq 200 ] || [ $status_code -eq 201 ]; then
echo "$collection_id successfully published ✅"
status_message+="- **$collection_id**: Successfully published ✅
"
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/promote.yml
Original file line number Diff line number Diff line change
Expand Up @@ -69,6 +69,6 @@ jobs:
run: |
pip install -r ./scripts/requirements.txt
for file in downloaded-files/*.json; do
python3 ./scripts/promote_to_production.py "$file"
python3 ./scripts/promote_collection.py "$file" "production"
echo "Processed file: $file"
done
90 changes: 90 additions & 0 deletions ingestion-data/staging/dataset-config/test-sm2a-staging.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,90 @@
{
"license": "CC0-1.0",
"stac_version": "1.0.0",
"links": [],
"spatial_extent": {
"xmin": -90.85,
"ymin": 29.03,
"xmax": -90.06,
"ymax": 29.65
},
"temporal_extent": {
"startdate": "2021-08-23T00:00:00.000Z",
"enddate": "2021-09-09T23:59:59.000Z"
},
"discovery_items": [
{
"upload": false,
"cogify": false,
"dry_run": false,
"filename_regex": "(.*)NDWI_Difference_(.*).tif$",
"use_multithreading": false,
"discovery": "s3",
"prefix": "planet-indices-v2/",
"bucket": "veda-data-store-staging"
}
],
"sample_files": [
"s3://veda-data-store-staging/planet-indices-v2/NDWI_Difference_2021-08-23_2021-09-09.tif"
],
"data_type": "cog",
"stac_extensions": [
"https://stac-extensions.github.io/render/v1.0.0/schema.json",
"https://stac-extensions.github.io/item-assets/v1.0.0/schema.json"
],
"item_assets": {
"cog_default": {
"type": "image/tiff; application=geotiff; profile=cloud-optimized",
"roles": [
"data",
"layer"
],
"title": "Default COG Layer",
"description": "Cloud optimized default layer to display on map"
}
},
"providers": [
{
"name": "NASA VEDA",
"roles": [
"host"
],
"url": "https://www.earthdata.nasa.gov/dashboard/"
}
],
"assets": {
"thumbnail": {
"title": "Thumbnail",
"type": "image/jpeg",
"roles": [
"thumbnail"
],
"href": "https://thumbnails.openveda.cloud/louisiana-marsh.jpg",
"description": "Photo by [Bridget Besaw](https://www.nature.org/en-us/get-involved/how-to-help/places-we-protect/the-nature-conservancy-in-louisiana-gulf-coast-prairies-and-marshes/) (Wetland landscape across southern Louisiana.)"
}
},
"collection": "ida-ndwi-difference-TEST-SM2A-STAGING",
"title": "NDWI Difference for Pre and Post-Hurricane Ida from PlanetScope TEST-SM2A-STAGING",
"dashboard:time_density": "day",
"description": "Normalized Difference Water Index Difference of before and after Hurricane Ida in Southern Louisiana.",
"renders": {
"dashboard": {
"resampling": "nearest",
"bidx": [
1
],
"colormap_name": "rdbu",
"assets": [
"cog_default"
],
"rescale": [
[
-1,
1
]
],
"title": "VEDA Dashboard Render Parameters"
}
},
"dashboard:is_periodic": true
}
Empty file.
Original file line number Diff line number Diff line change
Expand Up @@ -32,15 +32,53 @@ def validate_discovery_item_config(item: Dict[str, Any]) -> Dict[str, Any]:
return item


def publish_to_staging(payload):
base_api_url = os.getenv("STAGING_SM2A_API_URL")
dataset_pipeline_dag = os.getenv("DATASET_DAG_NAME", "veda_dataset_pipeline")
username = os.getenv("STAGING_SM2A_ADMIN_USERNAME")
password = os.getenv("STAGING_SM2A_ADMIN_PASSWORD")

api_token = b64encode(f"{username}:{password}".encode()).decode()

if not base_api_url or not api_token:
raise ValueError(
"STAGING_SM2A_API_URL or STAGING_SM2A_ADMIN_USERNAME"
+ " or STAGING_SM2A_ADMIN_PASSWORD is not"
+ " set in the environment variables."
)

headers = {
"Content-Type": "application/json",
"Authorization": "Basic " + api_token,
}

body = {
**payload,
"dag_run_id": f"{dataset_pipeline_dag}-{uuid.uuid4()}",
"note": "Run from GitHub Actions veda-data",
}
http_conn = http.client.HTTPSConnection(base_api_url)
http_conn.request(
"POST",
f"/api/v1/dags/{dataset_pipeline_dag}/dagRuns",
json.dumps(body),
headers,
)
response = http_conn.getresponse()
response_data = response.read()
http_conn.close()

print(json.dumps({"statusCode": response.status}))
return {"statusCode": response.status, "body": response_data.decode()}


def promote_to_production(payload):
base_api_url = os.getenv("SM2A_API_URL")
promotion_dag = os.getenv("PROMOTION_DAG_NAME", "veda_promotion_pipeline")
username = os.getenv("SM2A_ADMIN_USERNAME")
password = os.getenv("SM2A_ADMIN_PASSWORD")

api_token = b64encode(f"{username}:{password}".encode()).decode()
print(password)
print(api_token)

if not base_api_url or not api_token:
raise ValueError(
Expand All @@ -53,18 +91,18 @@ def promote_to_production(payload):
"Authorization": "Basic " + api_token,
}

payload["transfer"] = True
body = {
**payload,
"dag_run_id": f"{promotion_dag}-{uuid.uuid4()}",
"note": "Run from GitHub Actions veda-data",
}
http_conn = http.client.HTTPSConnection(base_api_url)
response = http_conn.request(
http_conn.request(
"POST", f"/api/v1/dags/{promotion_dag}/dagRuns", json.dumps(body), headers
)
response = http_conn.getresponse()
response_data = response.read()
print(f"Response: ${response_data}")
http_conn.close()

return {"statusCode": response.status, "body": response_data.decode()}
Expand All @@ -74,14 +112,20 @@ def promote_to_production(payload):
try:
with open(sys.argv[1], "r") as file:
input = json.load(file)
input["transfer"] = True
stage = sys.argv[2]
discovery_items = input.get("discovery_items")
validated_discovery_items = [
validate_discovery_item_config(item) for item in discovery_items
]

dag_payload = {"conf": input}
promote_to_production(dag_payload)

if stage == "production":
promote_to_production(dag_payload)
elif stage == "staging":
publish_to_staging(dag_payload)

except IndexError:
print("Usage: promote_collection.py <file_name> <stage>")
except FileNotFoundError:
print(f"Error: File '{sys.argv[1]}' not found.")
except json.JSONDecodeError:
raise ValueError(f"Invalid JSON content in file {sys.argv[1]}")