diff --git a/config/sidebars.ts b/config/sidebars.ts index cb7497550..4210867f3 100644 --- a/config/sidebars.ts +++ b/config/sidebars.ts @@ -26,6 +26,7 @@ const sidebars: SidebarsConfig = { { type: 'ref', id: 'data/rpc/README', label: 'Soroban RPC'}, { type: 'ref', id: 'data/hubble/README', label: 'Hubble'}, { type: 'ref', id: 'data/horizon/README', label: 'Horizon'}, + { type: 'ref', id: 'data/galexie/README', label: 'Galexie'}, ], tools: [ { @@ -74,6 +75,19 @@ const sidebars: SidebarsConfig = { collapsible: false, }, ], + galexie: [ + { + type: 'category', + label: 'Galexie', + items: [ + { + type: "autogenerated", + dirName: "data/galexie", + }, + ], + collapsible: false, + }, + ], soroban_rpc: [ { type: "category", diff --git a/docs/README.mdx b/docs/README.mdx index bf0897cdb..c1de1d70a 100644 --- a/docs/README.mdx +++ b/docs/README.mdx @@ -20,7 +20,7 @@ Information on how to issue assets on the Stellar network and create custom smar ### [Data](/docs/data/README.mdx) -Discover various data availability options: RPC, Hubble, and Horizon. +Discover various data availability options: RPC, Hubble, Horizon, and Galexie. ### [Tools](/docs/tools/README.mdx) diff --git a/docs/data/README.mdx b/docs/data/README.mdx index 0b48afb61..4a983a068 100644 --- a/docs/data/README.mdx +++ b/docs/data/README.mdx @@ -9,19 +9,19 @@ There are several products to choose from when interacting with the Stellar Netw This section will walk you through the differences between the various platforms and tools, what platform or tool is best for what use case, and then link to their various documentation locations. - **[RPC](#rpc)** - live network gateway -- **[Horizon](#horizon)** - API for network state data -- **Galexie** - exports raw ledger metadata files - **[Hubble](#hubble)** - analytics database for network data - -| Features | RPC | Horizon | Galexie | Hubble | -| ----------------------- | --- | ------- | ------- | ------ | -| Real-time Data | ✅ | ✅ | ✅ | ❌ | -| Historical Data | ❌ | ❌\* | ✅ | ✅ | -| Smart Contracts | ✅ | ❌ | ✅ | ✅ | -| API | ✅ | ✅ | ❌ | ❌ | -| Transaction Submission | ✅ | ✅ | ❌ | ❌ | -| Curated and Parsed Data | ❌ | ✅ | ❌ | ✅ | -| Ad Hoc Data Analysis | ❌ | ❌ | ❌ | ✅ | +- **[Horizon](#horizon)** - API for network state data +- **[Galexie](#galexie)** - exports raw ledger metadata files + +| Features | RPC | Hubble | Horizon | Galexie | +| ----------------------- | --- | ------ | ------- | ------- | +| Real-time Data | ✅ | ❌ | ✅ | ✅ | +| Historical Data | ❌ | ✅ | ❌\* | ✅ | +| Smart Contracts | ✅ | ✅ | ❌ | ✅ | +| API | ✅ | ❌ | ✅ | ❌ | +| Transaction Submission | ✅ | ❌ | ✅ | ❌ | +| Curated and Parsed Data | ❌ | ✅ | ✅ | ❌ | +| Ad Hoc Data Analysis | ❌ | ✅ | ❌ | ❌ | \*_Please note that Horizon can provide full historical data but is not the recommended tool for full historical data access._ @@ -39,20 +39,6 @@ If the RPC does not otherwise serve your needs, please tell us why in the [Stell You have the option of [setting up your own RPC instance](./rpc/admin-guide.mdx) or using a publicly available service from [an infrastructure provider](./rpc/rpc-providers.mdx). -## [Data Indexers](../tools/developer-tools/data-indexers.mdx) - -Data indexers are specialized tools that process and index blockchain data, making it more accessible and queryable to end users. They transform raw blockchain data into a more structured format that’s easier for end users to interact with. - -Data indexers have advanced querying capabilities and enhanced analytics. They provide features such as statistical analysis of blockchain activity, visualization of transaction flows, or tracking DeFi metrics — capabilities that go beyond basic transaction lookup for current or historical state data. - -Data indexers are a potentially more user-friendly, cost-effective choice for users. Check out several available data indexers for the Stellar network in our [Tools section](../tools/developer-tools/data-indexers.mdx). - -## [Analytics Platforms](../tools/developer-tools/analytics-platforms.mdx) - -Analytics Platforms are specialized tools that process and make historical Stellar network data available. The Stellar network data is loaded into database tables for large data analytics using SQL. Users can create complex ad hoc analysis, dashboarding, and curate actionable data insights (e.g., business intelligence or business analytics). - -Check out several available nalytics platforms for the Stellar network in our [Tools section](../tools/developer-tools/analytics-platforms.mdx). - ## [Hubble](./hubble/README.mdx) Hubble is an SDF-maintained, open-source, publicly available BigQuery data warehouse that provides a complete, holistic historical record of the Stellar network. It is a read-only platform and does not have the capability to send transactions to the network like you can with RPC. @@ -70,3 +56,21 @@ Horizon is an API for accessing and interacting with the Stellar network data. I Horizon stores three types of data (current state, historical state, and derived state) in one database, and the data is available in real-time for transactional use, which makes Horizon more expensive and resource-intensive to operate. If you’re considering using Horizon over the RPC, let us know in the [Stellar Developer Discord](https://discord.gg/stellardev) or file an issue in the [RPC repo](https://github.com/stellar/soroban-rpc) and let us know why! You can [run your own instance of Horizon](./horizon/admin-guide/README.mdx) or use one of the publicly available Horizon services from [these infrastructure providers](./horizon/horizon-providers.mdx). + +## [Galexie](./galexie/README.mdx) + +Galexie is a tool for exporting Stellar ledger metadata to external data storage. Learn more about its [use cases](./galexie/README.mdx) and how to [run](./galexie/admin_guide/README.mdx) your own instance of Galexie. + +## [Data Indexers](../tools/developer-tools/data-indexers.mdx) + +Data indexers are specialized tools that process and index blockchain data, making it more accessible and queryable to end users. They transform raw blockchain data into a more structured format that’s easier for end users to interact with. + +Data indexers have advanced querying capabilities and enhanced analytics. They provide features such as statistical analysis of blockchain activity, visualization of transaction flows, or tracking DeFi metrics — capabilities that go beyond basic transaction lookup for current or historical state data. + +Data indexers are a potentially more user-friendly, cost-effective choice for users. Check out several available data indexers for the Stellar network in our [Tools section](../tools/developer-tools/data-indexers.mdx). + +## [Analytics Platforms](../tools/developer-tools/analytics-platforms.mdx) + +Analytics Platforms are specialized tools that process and make historical Stellar network data available. The Stellar network data is loaded into database tables for large data analytics using SQL. Users can create complex ad hoc analysis, dashboarding, and curate actionable data insights (e.g., business intelligence or business analytics). + +Check out several available nalytics platforms for the Stellar network in our [Tools section](../tools/developer-tools/analytics-platforms.mdx). diff --git a/docs/data/galexie/README.mdx b/docs/data/galexie/README.mdx new file mode 100644 index 000000000..12b6cdb5a --- /dev/null +++ b/docs/data/galexie/README.mdx @@ -0,0 +1,37 @@ +--- +title: Galaxie Introduction +sidebar_position: 0 +--- + +## What is Galexie? + +Galexie is a tool for extracting, processing, exporting Stellar ledger metadata to external storage, and creating a data lake of pre-processed ledger metadata. Galexie is the foundation of the Composable Data Pipeline (CDP) and serves as the first step in extracting raw Stellar ledger metadata and making it accessible. Learn more about CDP’s benefits and applications in this [blog post](https://stellar.org/blog/developers/composable-data-platform). + +## What Are the Key Features of Galexie? + +Galexie is designed to make streamlined and efficient export of ledger metadata via a simple user-friendly interface. Its key features include: + +- Exporting Stellar ledger metadata to cloud storage +- Configurable to export a specified range of ledgers or continuously stream new ledgers as they are created on the Stellar network +- Exporting ledger metadata in XDR which is Stellar Core’s native format. +- Compressing data before export to optimize storage efficiency in the data lake. + +![](/assets/galexie-architecture.png) + +## Why XDR Format? + +Exporting data in XDR—the native Stellar Core format—enables Galexie to preserve full transaction metadata, ensuring data integrity while keeping storage efficient. The XDR format maintains compatibility with all Stellar components, providing a solid foundation for applications that require consistent access to historical data. Refer to the [XDR](/docs/learn/encyclopedia/data-format/xdr) documentation for more information on this format. + +## Why Run Galexie? + +Galexie enables you to make a copy of Stellar ledger metadata over which you have complete control. Galexie can continuously sync your data lake with the latest ledger data freeing you up from tedious data ingestion and allowing you to focus on building customized applications that consume and analyze exported data. + +## What Can You Do with the Data Lake Created by Galexie? + +Once data is stored in the cloud, it becomes easily accessible for integration with modern data processing and analytics tools, enabling various workflows and insights. + +The pre-processed ledger data exported by Galexie can be utilized across various applications, such as: + +- Analytics Tools: Analyze trends over time. +- Audit Applications: Retrieve historical transaction data for auditing and compliance. +- Monitoring Systems: Create tools to track network metrics. diff --git a/docs/data/galexie/admin_guide/README.mdx b/docs/data/galexie/admin_guide/README.mdx new file mode 100644 index 000000000..2586a379d --- /dev/null +++ b/docs/data/galexie/admin_guide/README.mdx @@ -0,0 +1,6 @@ +--- +title: Admin Guide +sidebar_position: 15 +--- + +This guide provides step-by-step instructions on installing and running the Galexie. diff --git a/docs/data/galexie/admin_guide/configuring.mdx b/docs/data/galexie/admin_guide/configuring.mdx new file mode 100644 index 000000000..a98f03758 --- /dev/null +++ b/docs/data/galexie/admin_guide/configuring.mdx @@ -0,0 +1,46 @@ +--- +title: Configuring +sidebar_position: 20 +--- + +# Configuring + +## Steps to Configure Galexie + +1. **Copy the Sample Configuration** + + Start with the provided sample file, [`config.example.toml`](https://github.com/stellar/go/blob/master/services/galexie/config.example.toml). + +2. **Rename and Update the Configuration** + + Rename the file to `config.toml` and adjust settings as needed. + + - **Key Settings Include:** + + - **Google Cloud Storage (GCS) Bucket** + + Specify the GCS bucket where Galexie will export Stellar ledger data. Update `destination_bucket_path` to the complete path of your GCS bucket, including subpaths if applicable. + + ```toml + destination_bucket_path = "stellar-network-data/testnet" + ``` + + - **Stellar Network** + + Set the Stellar network to be used in creating the data lake. + + ```toml + network = "testnet" + ``` + + - **Data Organization (Optional)** + + Configure how the exported data is organized in the GCS bucket. The example below adds 64 ledgers per file and organizes them in a directory of 1000 files. + + ```toml + # Number of ledgers stored in each file + ledgers_per_file = 1 + + # Number of files per partition/directory + files_per_partition = 64000 + ``` diff --git a/docs/data/galexie/admin_guide/installing.mdx b/docs/data/galexie/admin_guide/installing.mdx new file mode 100644 index 000000000..05f31cf3e --- /dev/null +++ b/docs/data/galexie/admin_guide/installing.mdx @@ -0,0 +1,12 @@ +--- +title: Installing +sidebar_position: 30 +--- + +# Installing + +To install Galexie, retrieve the Docker image from the [Stellar Docker Hub registry](https://hub.docker.com/r/stellar/stellar-galexie) using the following command: + +```shell +docker pull stellar/stellar-galexie +``` diff --git a/docs/data/galexie/admin_guide/monitoring.mdx b/docs/data/galexie/admin_guide/monitoring.mdx new file mode 100644 index 000000000..040cc0661 --- /dev/null +++ b/docs/data/galexie/admin_guide/monitoring.mdx @@ -0,0 +1,50 @@ +--- +title: Monitoring +sidebar_position: 50 +--- + +# Monitoring + +### Metrics + +Galexie publishes metrics through an HTTP-based admin endpoint, which makes it easier to monitor its performance. This endpoint is configurable in the `config.toml` file, where you can specify the port on which metrics are made available. The data is exposed in Prometheus format, enabling easy integration with existing monitoring and alerting systems. + +The admin port can be configured in the `config.toml` file by setting the `admin_port` variable. By default, the `admin_port` is set to `6061` + +```toml +# Admin port configuration +# Specifies the port for hosting the HTTP service that publishes metrics. +admin_port = 6061 +``` + +With this configuration, the URL to access the metrics endpoint will be: + +``` +http://:6061/metrics +``` + +Galexie emits several application-specific metrics to help track the export process: + +- `galexie_last_exported_ledger`: The sequence number of the most recently exported ledger. +- `galexie_uploader_put_duration_seconds`: The time taken to upload objects to the data lake. +- `galexie_uploader_object_size_bytes`: Compressed and uncompressed sizes of the objects being uploaded. +- `galexie_upload_queue_length`: Number of objects currently queued and waiting to be uploaded. + +In addition to these application-specific metrics, Galexie also exports system metrics (e.g., CPU, memory, open file descriptors) and Stellar Core ingestion metrics such as `galexie_ingest_ledger_fetch_duration_seconds` + +Use these metrics to build queries that monitor Galexie’s performance and export process. Here are a few examples of useful queries: + +- Export Times: Query `galexie_uploader_put_duration_seconds` to monitor average upload times. +- Queue Length: Use `galexie_upload_queue_length` to view the number of objects waiting to be uploaded. +- Latest Exported Ledger: Track `galexie_last_exported_ledger` to ensure that ledger exports are up-to-date. + +### Logging + +Galexie emits logs to stdout and generates a log line for every object being exported to help monitor progress. + +Example logs: + +``` +INFO[2024-11-07T17:40:37.795-08:00] Uploading: FFFFFF37--200-299/FFFFFF37--200.xdr.zstd pid=98734 service=galexie +INFO[2024-11-07T17:40:37.892-08:00] Uploaded FFFFFF37--200-299/FFFFFF37--200.xdr.zstd successfully pid=98734 service=galexie +``` diff --git a/docs/data/galexie/admin_guide/prerequisites.mdx b/docs/data/galexie/admin_guide/prerequisites.mdx new file mode 100644 index 000000000..c610a1846 --- /dev/null +++ b/docs/data/galexie/admin_guide/prerequisites.mdx @@ -0,0 +1,35 @@ +--- +title: Prerequisites +sidebar_position: 10 +--- + +# Prerequisites + +### 1. Google Cloud Platform (GCP) Account + +Galexie exports Stellar ledger metadata to Google Cloud Storage (GCS), so you need a GCP account with: + +- Permissions to create a new GCS bucket, or +- Access to an existing bucket with read/write permissions. + +### 2. Docker (Recommended) + +> **_NOTE:_** While it is possible to natively install Galexie (without Docker), this requires manual dependency management and is recommended only for advanced users.] + +Galexie is available as a Docker image, which simplifies installation and setup. Ensure you have Docker Engine installed on your system ([Docker installation guide](https://docs.docker.com/engine/install/)). + +## Hardware Requirements + +The minimum hardware requirements for running Galexie are:\ +**RAM**: 8 GB\ +**CPU**: 2 vCPUs\ +**Disk**: 100 GB with at least 5K IOPS + +### Full History Export + +Exporting the full history (as of November 2024): + +- Takes an estimated 150 days using a single instance of Galexie +- Creates a data lake of approximately 3 TB +- To speed up the export, multiple instances of Galexie can be executed in parallel, each working on a different range +- For example, creating the full history for Hubble data lake using about 40 parallel instances (each exporting approximately 1.25 million ledgers) on _e2-standard-2_ (Google Cloud Platform) takes about 5 days diff --git a/docs/data/galexie/admin_guide/running.mdx b/docs/data/galexie/admin_guide/running.mdx new file mode 100644 index 000000000..6651889a4 --- /dev/null +++ b/docs/data/galexie/admin_guide/running.mdx @@ -0,0 +1,109 @@ +--- +title: Running +sidebar_position: 40 +--- + +# Running + +With the Docker image available and the configuration file set up, you're now ready to run Galexie and start exporting Stellar ledger data to the GCS bucket. + +## Command Line Usage + +### Append Command + +This is the primary way of running Galexie. The `append` command operates in two distinct modes: + +- In continuous/unbounded mode, it starts exporting from the specified start ledger and continuously exports new ledgers that appear on the network until the process is interrupted. +- In fixed range mode, it exports the specified range of ledgers and exits when done. + +Syntax: + +```shell +stellar-galexie append --start [--end ] [--config-file ] +``` + +Arguments: + +`--start ` (required) + +- The starting ledger sequence number of the range being exported. + +`--end ` (optional) + +- The ending ledger sequence number of the range being exported. If unspecified or set to 0, the exporter will continuously export new ledgers as they appear on the network. + +`--config-file ` (optional) + +- The path to the configuration file. If unspecified, the application will look for a file named `config.toml` in the current directory. + +Example usage: + +```shell +docker run --platform linux/amd64 -d \ +-v "$HOME/.config/gcloud/application_default_credentials.json":/.config/gcp/credentials.json:ro \ +-e GOOGLE_APPLICATION_CREDENTIALS=/.config/gcp/credentials.json \ +-v ${PWD}/config.toml:/config.toml \ +stellar/stellar-galexie \ +append --start 350000 --end 450000 --config-file config.toml +``` + +`--platform linux/amd64` + +- Specifies the platform architecture (adjust if needed for your system). + +`-v` Mounts volumes to map your local GCP credentials and config.toml file to the container: + +- `$HOME/.config/gcloud/application_default_credentials.json`: Your local GCP credentials file. +- `${PWD}/config.toml`: Your local configuration file. + +`-e GOOGLE_APPLICATION_CREDENTIALS=/.config/gcp/credentials.json` + +- Sets the environment variable for credentials within the container. + +`stellar/stellar-galexie` + +- The Docker image name. + +#### Data Integrity and Resumability: + +The append command maintains strict sequential integrity within each export session. If interrupted and then restarted with the same range, it automatically resumes from where it left off before interruption, ensuring no ledgers are missed within a session. + +### Scan-and-fill Command + +The `scan-and-fill` command is useful in cases where there are gaps in the exported ledgers in the data lake. The command works by scanning all ledgers in the specified range, identifying missing ledgers and exporting only the missing ledgers while skipping existing ledgers in the data lake. + +The append command ensures there are no gaps in the exported range. However, the gaps may occur in the data lake due to certain sequence of events, often due to user intervention, such as: + +- Manual deletion of ledgers from the data lake. For example, deleting ledgers 80-90 out of the range 1-100. +- Running non-contiguous export ranges. For example, exporting ranges 1-50 and 60-100, leaving a gap between 50-60. In this case, running `append` command with the range 1-500 causes Galexie to resume export from from 101, without filling the gap. + +Syntax: + +```shell +stellar-galexie scan-and-fill --start --end [--config-file ] +``` + +Arguments: + +`--start ` (required) + +- The starting ledger sequence number of the range being exported. + +`--end ` (required) + +- The ending ledger sequence number of the range being exported. + +`--config-file ` (optional): + +- The path to the configuration file. If unspecified, the exporter will look for a file named “config.toml” in the current directory. + +Example usage: + +```shell +docker run --platform linux/amd64 -d \ +-v "$HOME/.config/gcloud/application_default_credentials.json":/.config/gcp/credentials.json:ro \ +-e GOOGLE_APPLICATION_CREDENTIALS=/.config/gcp/credentials.json \ +-v ${PWD}/config.toml:/config.toml \ +stellar/stellar-galexie \ +scan-and-fill --start 64000 --end 68000 --config-file config.toml +``` diff --git a/docs/data/galexie/admin_guide/setup.mdx b/docs/data/galexie/admin_guide/setup.mdx new file mode 100644 index 000000000..70632debb --- /dev/null +++ b/docs/data/galexie/admin_guide/setup.mdx @@ -0,0 +1,22 @@ +--- +title: Setup +sidebar_position: 10 +--- + +# Setup + +### Google Cloud Platform (GCP) credentials + +Create application default credentials by using your user account for your GCP project by following these steps: + +1. Download the [SDK](https://cloud.google.com/sdk/docs/install). +2. Install and initialize the [gcloud CLI](https://cloud.google.com/sdk/docs/initializing). +3. Create [application default credentials](https://cloud.google.com/docs/authentication/provide-credentials-adc#google-idp) and it should automatically store in this location: `$HOME/.config/gcloud/application_default_credentials.json.` +4. Verify that this file exists before moving on to the next step. + +### Google Cloud Storage (GCS) bucket + +If you already have a GCS bucket with read and write permissions, you can skip this section. If not, follow these steps: + +1. Visit the GCP Console's Storage section (https://console.cloud.google.com/storage) and create a new bucket. +2. Choose a descriptive name for the bucket, such as `stellar-ledger-data`. Refer to [Google Cloud Storage Bucket Naming Guideline](https://cloud.google.com/storage/docs/buckets#naming) for bucket naming conventions. Note down the bucket name, you will need it later during the configuration process. diff --git a/docusaurus.config.ts b/docusaurus.config.ts index ff118e6a1..20467918f 100644 --- a/docusaurus.config.ts +++ b/docusaurus.config.ts @@ -164,6 +164,11 @@ const config: Config = { docId: "data/horizon/README", label: "Horizon", }, + { + type: 'doc', + docId: "data/galexie/README", + label: "Galexie", + }, ] }, diff --git a/static/assets/galexie-architecture.png b/static/assets/galexie-architecture.png new file mode 100644 index 000000000..00873711c Binary files /dev/null and b/static/assets/galexie-architecture.png differ