Skip to content

Commit

Permalink
fix: update item and collection logic to match against filtered list
Browse files Browse the repository at this point in the history
  • Loading branch information
Jennifer Tran committed May 17, 2024
1 parent 2dc9b47 commit 09004f0
Showing 1 changed file with 50 additions and 27 deletions.
77 changes: 50 additions & 27 deletions transformation-scripts/collection-and-item-workflows-ingest.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -65,18 +65,19 @@
"]\n",
"\n",
"collection_json_file_paths = glob.glob(\"../ingestion-data/collections/*.json\")\n",
"filtered_list = [\n",
"filtered_collection_file_paths_list = [\n",
" item\n",
" for item in json_file_paths\n",
" if all(\n",
" excluded_collections not in item\n",
" for excluded_collections in excluded_collections\n",
" )\n",
"]\n",
"print(filtered_collection_file_paths_list)\n",
"\n",
"file_paths_and_collection_ids = [\n",
" {\"filePath\": file_path, \"collectionId\": data[\"id\"]}\n",
" for file_path in filtered_list\n",
" for file_path in filtered_collection_file_paths_list\n",
" if \"id\" in (data := json.load(open(file_path, \"r\")))\n",
"]"
]
Expand Down Expand Up @@ -137,21 +138,13 @@
" return json.load(file)\n",
"\n",
"\n",
"collections_files = \"../ingestion-data/collections/\"\n",
"discovery_items_files = (\n",
" \"../ingestion-data/staging/discovery-items/\"\n",
" if testing_mode\n",
" else \"../ingestion-data/production/discovery-items/\"\n",
"discovery_items_json_file_paths = glob.glob(\n",
" \"../ingestion-data/production/discovery-items//*.json\"\n",
")\n",
"\n",
"discovery_items_json_file_paths = (\n",
" glob.glob(\"../ingestion-data/staging/discovery-items//*.json\")\n",
" if testing_mode\n",
" else glob.glob(\"../ingestion-data/production/discovery-items//*.json\")\n",
")\n",
"# Find matching file names\n",
"matching_file_names = find_matching_file_names(\n",
" collections_json_file_paths, discovery_items_json_file_paths\n",
" filtered_collection_file_paths_list, discovery_items_json_file_paths\n",
")\n",
"\n",
"# for file_pair in matching_file_names:\n",
Expand Down Expand Up @@ -219,6 +212,15 @@
"}"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"failed_discovery_items = []"
]
},
{
"cell_type": "markdown",
"metadata": {},
Expand Down Expand Up @@ -257,7 +259,7 @@
" )\n",
"\n",
"\n",
"def ingest_discovery_item(discovery_item):\n",
"def ingest_discovery_item(discovery_item, discovery_item_path):\n",
" discovery_url = f\"{WORKFLOWS_API}discovery\"\n",
" print(discovery_url)\n",
" try:\n",
Expand All @@ -269,21 +271,33 @@
" print(\n",
" f\"ERROR: Kicking off discovery for {discovery_item} failed. Request failed with status code: {response.status_code}\"\n",
" )\n",
" failed_discovery_items.append(discovery_item_path)\n",
" except requests.RequestException as e:\n",
" print(\n",
" f\"ERROR: Kicking off discovery for {discovery_item} failed. An error occurred during the request: {e}\"\n",
" )\n",
" failed_discovery_items.append(discovery_item_path)\n",
" except Exception as e:\n",
" print(\n",
" f\"ERROR: An unexpected error occurred while trying to kick off discovery for {discovery_item} failed: {e}\"\n",
" )"
" )\n",
" failed_discovery_items.append(discovery_item_path)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"If testing_mode is enabled, use a test list:"
"If super_testing_mode is enabled, use a test list against a single collection:"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"super_testing_mode = False"
]
},
{
Expand All @@ -293,20 +307,20 @@
"outputs": [],
"source": [
"test_file_paths_and_collection_ids = [file_paths_and_collection_ids[0]]\n",
"test_discovery_item = [f\"../ingestion-data/staging/discovery-items/{file_paths_and_collection_ids[0].get(\"collectionId\")}.json\"]\n",
"test_discovery_item = [f\"../ingestion-data/production/discovery-items/{file_paths_and_collection_ids[0].get(\"collectionId\")}.json\"]\n",
"\n",
"print(test_discovery_item)\n",
"print(test_file_paths_and_collection_ids)\n",
"print(VEDA_STAC_API)\n",
"\n",
"file_paths_and_collection_ids = (\n",
" test_file_paths_and_collection_ids\n",
" if testing_mode\n",
" if super_testing_mode\n",
" else file_paths_and_collection_ids\n",
")\n",
"discovery_items_to_process = (\n",
" test_discovery_item\n",
" if testing_mode\n",
" if super_testing_mode\n",
" else discovery_items_to_process\n",
")\n",
"\n",
Expand Down Expand Up @@ -357,23 +371,32 @@
"metadata": {},
"outputs": [],
"source": [
"for discovery_item in discovery_items_to_process:\n",
" print(discovery_item)\n",
"for discovery_item_path in discovery_items_to_process:\n",
" try:\n",
" with open(discovery_item, \"r\", encoding=\"utf-8\") as file:\n",
" discovery_item_json = json.load(file)\n",
"\n",
" # Publish the updated collection to the target ingestion `api/collections` endpoint\n",
" if isinstance(discovery_item_json, list):\n",
" for single_discovery_item in discovery_item_json:\n",
" ingest_discovery_item(single_discovery_item)\n",
" for single_discovery_item_json in discovery_item_json:\n",
" ingest_discovery_item(single_discovery_item_json, discovery_item_path)\n",
" else:\n",
" ingest_discovery_item(discovery_item_json)\n",
" ingest_discovery_item(discovery_item_json, discovery_item_path)\n",
"\n",
" except requests.RequestException as e:\n",
" print(f\"An error occurred for discovery item {discovery_item}: {e}\")\n",
" print(f\"An error occurred for discovery item {discovery_item_path}: {e}\")\n",
" except Exception as e:\n",
" print(f\"An unexpected error occurred for discovery item {discovery_item}: {e}\")"
" print(\n",
" f\"An unexpected error occurred for discovery item {discovery_item_path}: {e}\"\n",
" )"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"print(failed_discovery_items)"
]
}
],
Expand Down

0 comments on commit 09004f0

Please sign in to comment.