From 2e1c7bd5f3278481db5d2f359f148b98086a232a Mon Sep 17 00:00:00 2001 From: Jennifer Tran <12633533+botanical@users.noreply.github.com> Date: Wed, 27 Mar 2024 09:05:30 -0700 Subject: [PATCH] feat: create notebook that publishes collection and item ingest for most collections (#111) * fix: update old links in metadata reconciliation notebook * fix: metadata change for cell description * fix: update cell descriptions * fix: update based on feedback to use domain names, remove extraneous code, clean up --------- Co-authored-by: Jennifer Tran --- .../collection-and-item-ingest.ipynb | 278 ++++++++++++++++++ .../collection-metadata-reconciliation.ipynb | 4 +- 2 files changed, 280 insertions(+), 2 deletions(-) create mode 100644 transformation-scripts/collection-and-item-ingest.ipynb diff --git a/transformation-scripts/collection-and-item-ingest.ipynb b/transformation-scripts/collection-and-item-ingest.ipynb new file mode 100644 index 00000000..97dc6ca8 --- /dev/null +++ b/transformation-scripts/collection-and-item-ingest.ipynb @@ -0,0 +1,278 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Notebook to Publish Items and Collections" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "This notebook publishes the collections in `/ingestion-data/collections` excluding:\n", + "- 'hls-l30-002-ej-reprocessed'\n", + "- 'hls-s30-002-ej-reprocessed'\n", + "- 'ls8-covid-19-example-data'\n", + "- 'landsat-c2l2-sr-antarctic-glaciers-pine-island'\n", + "- 'landsat-c2l2-sr-lakes-aral-sea'\n", + "- 'landsat-c2l2-sr-lakes-tonle-sap'\n", + "- 'landsat-c2l2-sr-lakes-lake-balaton'\n", + "- 'landsat-c2l2-sr-lakes-vanern'\n", + "- 'landsat-c2l2-sr-antarctic-glaciers-thwaites'\n", + "- 'landsat-c2l2-sr-lakes-lake-biwa'\n", + "- 'combined_CMIP6_daily_GISS-E2-1-G_tas_kerchunk_DEMO'" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import glob\n", + "import json\n", + "import requests\n", + "from cognito_client import CognitoClient" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "The following cell retrieves collection JSON files from `/ingestion-data/collections/` and save collectionIds to a list." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "excluded_collections = [\n", + " \"hls-l30-002-ej-reprocessed\",\n", + " \"hls-s30-002-ej-reprocessed\",\n", + " \"ls8-covid-19-example-data\",\n", + " \"landsat-c2l2-sr-antarctic-glaciers-pine-island\",\n", + " \"landsat-c2l2-sr-lakes-aral-sea\",\n", + " \"landsat-c2l2-sr-lakes-tonle-sap\",\n", + " \"landsat-c2l2-sr-lakes-lake-balaton\",\n", + " \"landsat-c2l2-sr-lakes-vanern\",\n", + " \"landsat-c2l2-sr-antarctic-glaciers-thwaites\",\n", + " \"landsat-c2l2-sr-lakes-lake-biwa\",\n", + " \"combined_CMIP6_daily_GISS-E2-1-G_tas_kerchunk_DEMO\",\n", + "]\n", + "\n", + "json_file_paths = glob.glob(\"../ingestion-data/collections/*.json\")\n", + "filtered_list = [\n", + " item\n", + " for item in json_file_paths\n", + " if all(\n", + " excluded_collections not in item\n", + " for excluded_collections in excluded_collections\n", + " )\n", + "]\n", + "\n", + "file_paths_and_collection_ids = [\n", + " {\"filePath\": file_path, \"collectionId\": data[\"id\"]}\n", + " for file_path in filtered_list\n", + " if \"id\" in (data := json.load(open(file_path, \"r\")))\n", + "]" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Set the testing mode to `True` when testing and `False` otherwise. When the testing mode is `True`, the notebook will be set to run against `dev` endpoints." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "testing_mode = True" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Have your Cognito `username` and `password` ready to set up Cognito Client to retrieve a token that will be used to access the STAC Ingestor API." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "test_endpoint = \"https://test.openveda.cloud\"\n", + "test_client_id = \"CHANGE ME\"\n", + "test_user_pool_id = \"CHANGE ME\"\n", + "test_identity_pool_id = \"CHANGE ME\"\n", + "\n", + "mcp_prod_endpoint = \"https://openveda.cloud\"\n", + "mcp_prod_client_id = \"CHANGE ME\"\n", + "mcp_prod_user_pool_id = \"CHANGE ME\"\n", + "mcp_prod_identity_pool_id = \"CHANGE ME\"\n", + "\n", + "if testing_mode:\n", + " STAC_INGESTOR_API = f\"{test_endpoint}/api/ingest/\"\n", + " VEDA_STAC_API = f\"{test_endpoint}/api/stac/\"\n", + "else:\n", + " STAC_INGESTOR_API = f\"{mcp_prod_endpoint}/api/ingest/\"\n", + " VEDA_STAC_API = f\"{mcp_prod_endpoint}/api/stac/\"\n", + "\n", + "client = CognitoClient(\n", + " client_id=test_client_id if testing_mode else mcp_prod_client_id,\n", + " user_pool_id=test_user_pool_id if testing_mode else mcp_prod_user_pool_id,\n", + " identity_pool_id=test_identity_pool_id\n", + " if testing_mode\n", + " else mcp_prod_identity_pool_id,\n", + ")\n", + "_ = client.login()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "The following cell sets up headers for requests." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "TOKEN = client.access_token\n", + "\n", + "authorization_header = f\"Bearer {TOKEN}\"\n", + "headers = {\n", + " \"Authorization\": authorization_header,\n", + " \"content-type\": \"application/json\",\n", + " \"accept\": \"application/json\",\n", + "}" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "The following cell defines the function that will post the collection." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "def post_collection(collection, collection_id):\n", + " collection_url = f\"{VEDA_STAC_API}collections/{collection_id}\"\n", + " ingest_url = f\"{STAC_INGESTOR_API}collections\"\n", + "\n", + " try:\n", + " response = requests.post(ingest_url, json=collection, headers=headers)\n", + " response.raise_for_status()\n", + " if response.status_code == 201:\n", + " print(\n", + " f\"Request was successful. Find the updated collection at {collection_url}\"\n", + " )\n", + " else:\n", + " print(\n", + " f\"Updating {collection_id} failed. Request failed with status code: {response.status_code}\"\n", + " )\n", + " except requests.RequestException as e:\n", + " print(\n", + " f\"Updating {collection_id} failed. An error occurred during the request: {e}\"\n", + " )\n", + " except Exception as e:\n", + " print(\n", + " f\"An unexpected error occurred while trying to update {collection_id}: {e}\"\n", + " )" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "If testing_mode is enabled, use a test list:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "test_file_paths_and_collection_ids = [file_paths_and_collection_ids[0]]\n", + "print(test_file_paths_and_collection_ids)\n", + "print(VEDA_STAC_API)\n", + "\n", + "\n", + "file_paths_and_collection_ids = (\n", + " test_file_paths_and_collection_ids\n", + " if testing_mode\n", + " else file_paths_and_collection_ids\n", + ")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "The following cell publishes the collection to the target ingestion `api/collections` endpoint." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "for item in file_paths_and_collection_ids:\n", + " collection_id = item[\"collectionId\"]\n", + " file_path = item[\"filePath\"]\n", + "\n", + " try:\n", + " with open(file_path, \"r\", encoding=\"utf-8\") as file:\n", + " collection = json.load(file)\n", + "\n", + " # Publish the updated collection to the target ingestion `api/collections` endpoint\n", + " post_collection(collection, collection_id)\n", + "\n", + " except requests.RequestException as e:\n", + " print(f\"An error occurred for collectionId {collection_id}: {e}\")\n", + " except Exception as e:\n", + " print(f\"An unexpected error occurred for collectionId {collection_id}: {e}\")" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "venv", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.7" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/transformation-scripts/collection-metadata-reconciliation.ipynb b/transformation-scripts/collection-metadata-reconciliation.ipynb index eafe9b4c..54c5d3d1 100644 --- a/transformation-scripts/collection-metadata-reconciliation.ipynb +++ b/transformation-scripts/collection-metadata-reconciliation.ipynb @@ -77,7 +77,7 @@ "metadata": {}, "outputs": [], "source": [ - "dev_endpoint = \"https://dev.delta-backend.com/\"\n", + "dev_endpoint = \"https://dev.openveda.cloud\"\n", "dev_client_id = \"CHANGE ME\"\n", "dev_user_pool_id = \"CHANGE ME\"\n", "dev_identity_pool_id = \"CHANGE ME\"\n", @@ -88,7 +88,7 @@ "staging_identity_pool_id = \"CHANGE ME\"\n", "\n", "ingestor_staging_url = \"https://ig9v64uky8.execute-api.us-west-2.amazonaws.com/staging/\"\n", - "ingestor_dev_url = \"https://dev.delta-backend.com/\"\n", + "ingestor_dev_url = \"https://dev.openveda.cloud\"\n", "\n", "if testing_mode:\n", " STAC_INGESTOR_API = ingestor_dev_url\n",