From ed4a63411425270b9f566689795bd0767a54f1a9 Mon Sep 17 00:00:00 2001 From: Simon Chow Date: Wed, 31 Jul 2024 22:47:57 -0400 Subject: [PATCH 1/3] Add galexie backfill scripts --- services/galexie/scripts/README.md | 15 +++++ services/galexie/scripts/batch_config.yml | 52 +++++++++++++++++ .../scripts/generate_compute_instances.py | 58 +++++++++++++++++++ 3 files changed, 125 insertions(+) create mode 100644 services/galexie/scripts/README.md create mode 100644 services/galexie/scripts/batch_config.yml create mode 100755 services/galexie/scripts/generate_compute_instances.py diff --git a/services/galexie/scripts/README.md b/services/galexie/scripts/README.md new file mode 100644 index 0000000000..6a01b34c42 --- /dev/null +++ b/services/galexie/scripts/README.md @@ -0,0 +1,15 @@ +## Galexie: Backfill Examples + +The files in this directory are examples in different ways to use Galexie to backfill Stellar network data into a Google Cloud Storage (GCS) bucket. + +## Notes and Tips + +* An unoptimized full history backfill with pubnet data using Galexie took roughly 4.5 days +* Total costs ~= $1100 USD + * Compute Costs ~= $500 USD + * GCS Class A Operations (writes) Costs ~= $600 USD +* Pubnet full history size is ~= 3 TB (as of 2024-07-31) +* Using Galexie for earlier ledgers will be processed faster than ledgers closer to the current time. This is due to the fact that ledgers closer to the current time have more data due to additional features added over the years as well as larger adoption and usage of the Stellar network in general. +* There is a noticable inflection point in runtime around ledger 30000000 (30 million). At this time it is recommened to use smaller ledger ranges for the backfilling process. +* There are extra flags that can be enabled in the captive-core.cfg to output extra information such as `ENABLE_SOROBAN_DIAGNOSTIC_EVENTS`. Please see more captive-core options [here](https://github.com/stellar/go/blob/f692f1246b01fb09af2c232630d4ad31025de747/ingest/ledgerbackend/toml.go#L74-L109) +* Large ledger ranges (e.g., 100000 VS 2500000 ledger range) may slow down processing speed (this assumption has not been confirmed and may not affect your use case) diff --git a/services/galexie/scripts/batch_config.yml b/services/galexie/scripts/batch_config.yml new file mode 100644 index 0000000000..d949644cc8 --- /dev/null +++ b/services/galexie/scripts/batch_config.yml @@ -0,0 +1,52 @@ +# This yaml file serves as an example job configuration file for GCP batch. +# https://cloud.google.com/batch + +job: + taskGroups: + - taskSpec: + computeResource: + cpuMilli: 3000 + memoryMib: 2000 + maxRetryCount: 1 + container: + imageUri: "stellar/stellar-galexie:1.0.0" + entrypoint: "galexie" + commands: ["append", "--config-file", "/mnt/galexie-config-pubnet-batch/config-pubnet.toml", "--start", "${START}", "--end", "#{END}"] + tasks: + # It is possible to use the GCP batch index instead of manually naming each task + - name: "galexie-1" + environments: + START: "2" + END: "2499999" + - name: "galexie-2" + environments: + START: "2500000" + END: "4999999" + + ... + + - name: "galexie-3" + environments: + START: "30000000" + END: "31249999" + - name: "galexie-4" + environments: + START: "31250000" + END: "32499999" + + ... + + requireHostsFile: true + requireTaskHostsFile: true + allocationPolicy: + instances: + - policy: + machineType: "e2-standard-2" + disks: + - newDisk: + type: "pd-standard" + sizeGb: 10 + mountPoint: "/mnt/shared" + - existingDisk: + disk: "" + mountPoint: "/mnt/galexie-config-pubnet-batch" diff --git a/services/galexie/scripts/generate_compute_instances.py b/services/galexie/scripts/generate_compute_instances.py new file mode 100755 index 0000000000..4f2b1ebf8e --- /dev/null +++ b/services/galexie/scripts/generate_compute_instances.py @@ -0,0 +1,58 @@ +#!/usr/bin/python3 +""" +This Python script serves as an example of how you could create a series of commands +to create GCP compute instances that run galexie for backfill purposes. + +This script may need slight modifications depending on the GCP project +you plan to create compute instances in. +""" + +command = """gcloud compute instances create-with-container {instance_name} \ +--project={gcp_project} \ +--zone={zone} \ +--machine-type=e2-standard-2 \ +--network-interface=network-tier=PREMIUM,stack-type=IPV4_ONLY,subnet=default \ +--maintenance-policy=MIGRATE \ +--provisioning-model=STANDARD \ +--service-account={service_account} \ +--scopes=https://www.googleapis.com/auth/cloud-platform \ +--image=projects/cos-cloud/global/images/cos-stable-113-18244-85-29 \ +--boot-disk-size=10GB \ +--boot-disk-type=pd-balanced \ +--boot-disk-device-name=galexie-pubnet-custom-config \ +--container-image=stellar/stellar-galexie:1.0.0 \ +--container-restart-policy=always \ +--container-privileged \ +--container-command=galexie \ +--container-arg=append \ +--container-arg=--config-file \ +--container-arg=/mnt/galexie-config-pubnet/config-pubnet.toml \ +--container-arg=--start \ +--container-arg={start} \ +--container-arg=--end \ +--container-arg={end} \ +--container-mount-disk=mode=rw,mount-path=/mnt/galexie-config-pubnet,name=galexie-config-pubnet-batch-{batch_num},partition=0 \ +--disk=boot=no,device-name=galexie-config-pubnet-batch-{batch_num},mode=rw,name=galexie-config-pubnet-batch-{batch_num},scope=regional \ +--no-shielded-secure-boot \ +--shielded-vtpm \ +--shielded-integrity-monitoring \ +--labels=goog-ec-src=vm_add-gcloud,container-vm=cos-stable-113-18244-85-29""" + +gcp_project = "" +zone = "" +service_account = "" + +commands = [] +batch_size = 2500000 +start = 0 +last_ledger = 52124262 + +for i in range(1, 22): + instance_name = f"galexie-pubnet-custom-config-{i}" + end = start + batch_size - 1 + if i == 21: + end = last_ledger + commands.append(command.format(instance_name=instance_name, start=start, end=end, batch_num=i)) + start = end + 1 + +print(";\n\n".join(commands)) From 203cdea3eaacb118e4d87e7e1b097383f84d292f Mon Sep 17 00:00:00 2001 From: Simon Chow Date: Thu, 1 Aug 2024 11:59:49 -0400 Subject: [PATCH 2/3] Update README --- services/galexie/scripts/README.md | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) diff --git a/services/galexie/scripts/README.md b/services/galexie/scripts/README.md index 6a01b34c42..74f29c7c67 100644 --- a/services/galexie/scripts/README.md +++ b/services/galexie/scripts/README.md @@ -13,3 +13,24 @@ The files in this directory are examples in different ways to use Galexie to bac * There is a noticable inflection point in runtime around ledger 30000000 (30 million). At this time it is recommened to use smaller ledger ranges for the backfilling process. * There are extra flags that can be enabled in the captive-core.cfg to output extra information such as `ENABLE_SOROBAN_DIAGNOSTIC_EVENTS`. Please see more captive-core options [here](https://github.com/stellar/go/blob/f692f1246b01fb09af2c232630d4ad31025de747/ingest/ledgerbackend/toml.go#L74-L109) * Large ledger ranges (e.g., 100000 VS 2500000 ledger range) may slow down processing speed (this assumption has not been confirmed and may not affect your use case) + +## Instructions for generate_compute_instance.py + +* This Python script will generate `gcloud compute instance` commands that you can run in your terminal/shell to create compute instances that run Galexie over a specified ledger range +* To use this script please fill out the variables between lines 41 and 48. This will include information such as your GCP project, zone, and service account you wish to use to execute Galexie with. +* You will need to create the volume/disk mounts that contain the Galexie and captive core configuration files + +``` +--container-mount-disk=mode=rw,mount-path=/mnt/galexie-config-pubnet,name=galexie-config-pubnet-batch-{batch_num},partition=0 \ +``` + +* Note that these compute instances will not spin down on their own. The Galexie image will complete and will be stuck in an infinite retry loop. Please manually stop the compute instance when all ledgers for the ledger range have been written + +## Instructions for batch_config.yml + +* This YAML file is a job configuration that creates compute instances to run Galexie using [GCP batch](https://cloud.google.com/batch) +* This will not run as is and will need users to modify the tasks as well as the mount disks containing the Galexie and captive core configuration files +* This file can be used like so +``` +gcloud batch jobs submit galexie-batch --config batch_config.yml +``` From 1bb3f5229cd83c6b465762e48246bd6b2789109f Mon Sep 17 00:00:00 2001 From: Simon Chow Date: Thu, 1 Aug 2024 12:00:35 -0400 Subject: [PATCH 3/3] Missing newline --- services/galexie/scripts/README.md | 1 + 1 file changed, 1 insertion(+) diff --git a/services/galexie/scripts/README.md b/services/galexie/scripts/README.md index 74f29c7c67..d182b3d356 100644 --- a/services/galexie/scripts/README.md +++ b/services/galexie/scripts/README.md @@ -31,6 +31,7 @@ The files in this directory are examples in different ways to use Galexie to bac * This YAML file is a job configuration that creates compute instances to run Galexie using [GCP batch](https://cloud.google.com/batch) * This will not run as is and will need users to modify the tasks as well as the mount disks containing the Galexie and captive core configuration files * This file can be used like so + ``` gcloud batch jobs submit galexie-batch --config batch_config.yml ```