diff --git a/.github/workflows/galexie-release.yml b/.github/workflows/galexie-release.yml index 18c441eae3..b84f3c1cf0 100644 --- a/.github/workflows/galexie-release.yml +++ b/.github/workflows/galexie-release.yml @@ -16,7 +16,7 @@ jobs: # this is the multi-arch index sha, get it by 'docker buildx imagetools inspect stellar/quickstart:testing' GALEXIE_INTEGRATION_TESTS_QUICKSTART_IMAGE: docker.io/stellar/quickstart:testing@sha256:03c6679f838a92b1eda4cd3a9e2bdee4c3586e278a138a0acf36a9bc99a0041f GALEXIE_INTEGRATION_TESTS_QUICKSTART_IMAGE_PULL: "false" - STELLAR_CORE_VERSION: 21.1.0-1921.b3aeb14cc.focal + STELLAR_CORE_VERSION: 21.3.1-2007.4ede19620.focal steps: - name: Set VERSION run: | diff --git a/.github/workflows/galexie.yml b/.github/workflows/galexie.yml index 458f23ca37..6406a7a897 100644 --- a/.github/workflows/galexie.yml +++ b/.github/workflows/galexie.yml @@ -10,7 +10,7 @@ jobs: name: Test runs-on: ubuntu-latest env: - CAPTIVE_CORE_DEBIAN_PKG_VERSION: 21.1.0-1921.b3aeb14cc.focal + CAPTIVE_CORE_DEBIAN_PKG_VERSION: 21.3.1-2007.4ede19620.focal GALEXIE_INTEGRATION_TESTS_ENABLED: "true" GALEXIE_INTEGRATION_TESTS_CAPTIVE_CORE_BIN: /usr/bin/stellar-core # this pins to a version of quickstart:testing that has the same version as GALEXIE_INTEGRATION_TESTS_CAPTIVE_CORE_BIN diff --git a/.github/workflows/horizon.yml b/.github/workflows/horizon.yml index 117e11bb6f..86e68c3d85 100644 --- a/.github/workflows/horizon.yml +++ b/.github/workflows/horizon.yml @@ -33,9 +33,9 @@ jobs: HORIZON_INTEGRATION_TESTS_ENABLED: true HORIZON_INTEGRATION_TESTS_CORE_MAX_SUPPORTED_PROTOCOL: ${{ matrix.protocol-version }} HORIZON_INTEGRATION_TESTS_CAPTIVE_CORE_USE_DB: true - PROTOCOL_21_CORE_DEBIAN_PKG_VERSION: 21.0.0-1872.c6f474133.focal - PROTOCOL_21_CORE_DOCKER_IMG: stellar/stellar-core:21 - PROTOCOL_21_SOROBAN_RPC_DOCKER_IMG: stellar/soroban-rpc:21.0.0-rc2-73 + PROTOCOL_21_CORE_DEBIAN_PKG_VERSION: 21.3.1-2007.4ede19620.focal + PROTOCOL_21_CORE_DOCKER_IMG: stellar/stellar-core:21.3.1-2007.4ede19620.focal + PROTOCOL_21_SOROBAN_RPC_DOCKER_IMG: stellar/soroban-rpc:21.5.1 PGHOST: localhost PGPORT: 5432 PGUSER: postgres @@ -98,7 +98,7 @@ jobs: - name: Calculate the source hash id: calculate_source_hash run: | - combined_hash=$(echo "horizon-hash-${{ hashFiles('./horizon') }}-${{ hashFiles('./clients/horizonclient/**') }}-${{ hashFiles('./protocols/horizon/**') }}-${{ hashFiles('./txnbuild/**') }}-${{ hashFiles('./ingest/**') }}-${{ hashFiles('./xdr/**') }}-${{ hashFiles('./services/**') }}-${{ env.PROTOCOL_21_CORE_DOCKER_IMG }}-${{ env.PREFIX }}" | sha256sum | cut -d ' ' -f 1) + combined_hash=$(echo "horizon-hash-${{ hashFiles('./horizon') }}-${{ hashFiles('./clients/horizonclient/**') }}-${{ hashFiles('./protocols/horizon/**') }}-${{ hashFiles('./txnbuild/**') }}-${{ hashFiles('./ingest/**') }}-${{ hashFiles('./xdr/**') }}-${{ hashFiles('./services/**') }}-${{ env.PROTOCOL_21_CORE_DOCKER_IMG }}-${{ env.PROTOCOL_21_RPC_DOCKER_IMG }}-${{ env.PROTOCOL_21_CORE_DEBIAN_PKG_VERSION }}-${{ env.PREFIX }}" | sha256sum | cut -d ' ' -f 1) echo "COMBINED_SOURCE_HASH=$combined_hash" >> "$GITHUB_ENV" - name: Restore Horizon binary and integration tests source hash to cache @@ -123,7 +123,7 @@ jobs: name: Test (and push) verify-range image runs-on: ubuntu-22.04 env: - STELLAR_CORE_VERSION: 21.0.0-1872.c6f474133.focal + STELLAR_CORE_VERSION: 21.3.1-2007.4ede19620.focal CAPTIVE_CORE_STORAGE_PATH: /tmp steps: - uses: actions/checkout@v3 diff --git a/ingest/README.md b/ingest/README.md index 277807f978..bace92c2d0 100644 --- a/ingest/README.md +++ b/ingest/README.md @@ -23,9 +23,9 @@ From a high level, the ingestion library is broken down into a few modular compo [ Ledger Backend ] | - | - Captive - Core + |---+---| + Captive Buffered + Core Storage ``` This is described in a little more detail in [`doc.go`](./doc.go), its accompanying examples, the documentation within this package, and the rest of this tutorial. diff --git a/ingest/ledgerbackend/buffered_storage_backend_test.go b/ingest/ledgerbackend/buffered_storage_backend_test.go index ca2711c40d..6c95b465f7 100644 --- a/ingest/ledgerbackend/buffered_storage_backend_test.go +++ b/ingest/ledgerbackend/buffered_storage_backend_test.go @@ -505,7 +505,9 @@ func TestLedgerBufferBoundedObjectNotFound(t *testing.T) { bsb.ledgerBuffer.wg.Wait() _, err := bsb.GetLedger(ctx, 3) - assert.EqualError(t, err, "failed getting next ledger batch from queue: ledger object containing sequence 3 is missing: file does not exist") + assert.ErrorContains(t, err, "ledger object containing sequence 3 is missing") + assert.ErrorContains(t, err, objectName) + assert.ErrorContains(t, err, "file does not exist") } func TestLedgerBufferUnboundedObjectNotFound(t *testing.T) { @@ -571,5 +573,8 @@ func TestLedgerBufferRetryLimit(t *testing.T) { bsb.ledgerBuffer.wg.Wait() _, err := bsb.GetLedger(context.Background(), 3) - assert.EqualError(t, err, "failed getting next ledger batch from queue: maximum retries exceeded for downloading object containing sequence 3: transient error") + assert.ErrorContains(t, err, "failed getting next ledger batch from queue") + assert.ErrorContains(t, err, "maximum retries exceeded for downloading object containing sequence 3") + assert.ErrorContains(t, err, objectName) + assert.ErrorContains(t, err, "transient error") } diff --git a/ingest/ledgerbackend/captive_core_backend.go b/ingest/ledgerbackend/captive_core_backend.go index c8f28974f5..dc5365809b 100644 --- a/ingest/ledgerbackend/captive_core_backend.go +++ b/ingest/ledgerbackend/captive_core_backend.go @@ -112,9 +112,11 @@ type CaptiveStellarCore struct { lastLedger *uint32 // end of current segment if offline, nil if online previousLedgerHash *string - config CaptiveCoreConfig - stellarCoreClient *stellarcore.Client - captiveCoreVersion string // Updates when captive-core restarts + config CaptiveCoreConfig + captiveCoreStartDuration prometheus.Summary + captiveCoreNewDBCounter prometheus.Counter + stellarCoreClient *stellarcore.Client + captiveCoreVersion string // Updates when captive-core restarts } // CaptiveCoreConfig contains all the parameters required to create a CaptiveStellarCore instance @@ -230,7 +232,7 @@ func NewCaptive(config CaptiveCoreConfig) (*CaptiveStellarCore, error) { c.stellarCoreRunnerFactory = func() stellarCoreRunnerInterface { c.setCoreVersion() - return newStellarCoreRunner(config) + return newStellarCoreRunner(config, c.captiveCoreNewDBCounter) } if config.Toml != nil && config.Toml.HTTPPort != 0 { @@ -315,7 +317,27 @@ func (c *CaptiveStellarCore) registerMetrics(registry *prometheus.Registry, name return float64(latest) }, ) - registry.MustRegister(coreSynced, supportedProtocolVersion, latestLedger) + c.captiveCoreStartDuration = prometheus.NewSummary(prometheus.SummaryOpts{ + Namespace: namespace, + Subsystem: "ingest", + Name: "captive_stellar_core_start_duration_seconds", + Help: "duration of start up time when running captive core on an unbounded range, sliding window = 10m", + Objectives: map[float64]float64{0.5: 0.05, 0.9: 0.01, 0.99: 0.001}, + }) + c.captiveCoreNewDBCounter = prometheus.NewCounter(prometheus.CounterOpts{ + Namespace: namespace, + Subsystem: "ingest", + Name: "captive_stellar_core_new_db", + Help: "counter for the number of times we start up captive core with a new buckets db, sliding window = 10m", + }) + + registry.MustRegister( + coreSynced, + supportedProtocolVersion, + latestLedger, + c.captiveCoreStartDuration, + c.captiveCoreNewDBCounter, + ) } func (c *CaptiveStellarCore) getLatestCheckpointSequence() (uint32, error) { @@ -521,12 +543,14 @@ func (c *CaptiveStellarCore) startPreparingRange(ctx context.Context, ledgerRang // Please note that using a BoundedRange, currently, requires a full-trust on // history archive. This issue is being fixed in Stellar-Core. func (c *CaptiveStellarCore) PrepareRange(ctx context.Context, ledgerRange Range) error { + startTime := time.Now() if alreadyPrepared, err := c.startPreparingRange(ctx, ledgerRange); err != nil { return errors.Wrap(err, "error starting prepare range") } else if alreadyPrepared { return nil } + var reportedStartTime bool // the prepared range might be below ledgerRange.from so we // need to seek ahead until we reach ledgerRange.from for seq := c.prepared.from; seq <= ledgerRange.from; seq++ { @@ -534,6 +558,12 @@ func (c *CaptiveStellarCore) PrepareRange(ctx context.Context, ledgerRange Range if err != nil { return errors.Wrapf(err, "Error fast-forwarding to %d", ledgerRange.from) } + if !reportedStartTime { + reportedStartTime = true + if c.captiveCoreStartDuration != nil && !ledgerRange.bounded { + c.captiveCoreStartDuration.Observe(time.Since(startTime).Seconds()) + } + } } return nil diff --git a/ingest/ledgerbackend/captive_core_backend_test.go b/ingest/ledgerbackend/captive_core_backend_test.go index f8161aec25..cb7e8dba7e 100644 --- a/ingest/ledgerbackend/captive_core_backend_test.go +++ b/ingest/ledgerbackend/captive_core_backend_test.go @@ -10,6 +10,8 @@ import ( "sync" "testing" + "github.com/prometheus/client_golang/prometheus" + dto "github.com/prometheus/client_model/go" "github.com/stretchr/testify/assert" "github.com/stretchr/testify/mock" "github.com/stretchr/testify/require" @@ -224,7 +226,7 @@ func TestCaptivePrepareRange(t *testing.T) { }, nil) cancelCalled := false - captiveBackend := CaptiveStellarCore{ + captiveBackend := &CaptiveStellarCore{ archive: mockArchive, stellarCoreRunnerFactory: func() stellarCoreRunnerInterface { return mockRunner @@ -234,6 +236,7 @@ func TestCaptivePrepareRange(t *testing.T) { cancelCalled = true }), } + captiveBackend.registerMetrics(prometheus.NewRegistry(), "test") err := captiveBackend.PrepareRange(ctx, BoundedRange(100, 200)) assert.NoError(t, err) @@ -243,6 +246,8 @@ func TestCaptivePrepareRange(t *testing.T) { assert.True(t, cancelCalled) mockRunner.AssertExpectations(t) mockArchive.AssertExpectations(t) + + assert.Equal(t, uint64(0), getStartDurationMetric(captiveBackend).GetSampleCount()) } func TestCaptivePrepareRangeCrash(t *testing.T) { @@ -263,7 +268,7 @@ func TestCaptivePrepareRangeCrash(t *testing.T) { CurrentLedger: uint32(200), }, nil) - captiveBackend := CaptiveStellarCore{ + captiveBackend := &CaptiveStellarCore{ archive: mockArchive, stellarCoreRunnerFactory: func() stellarCoreRunnerInterface { return mockRunner @@ -302,7 +307,7 @@ func TestCaptivePrepareRangeTerminated(t *testing.T) { CurrentLedger: uint32(200), }, nil) - captiveBackend := CaptiveStellarCore{ + captiveBackend := &CaptiveStellarCore{ archive: mockArchive, stellarCoreRunnerFactory: func() stellarCoreRunnerInterface { return mockRunner @@ -339,7 +344,7 @@ func TestCaptivePrepareRangeCloseNotFullyTerminated(t *testing.T) { CurrentLedger: uint32(200), }, nil) - captiveBackend := CaptiveStellarCore{ + captiveBackend := &CaptiveStellarCore{ archive: mockArchive, stellarCoreRunnerFactory: func() stellarCoreRunnerInterface { return mockRunner @@ -367,7 +372,7 @@ func TestCaptivePrepareRange_ErrClosingSession(t *testing.T) { mockRunner.On("getProcessExitError").Return(nil, false) mockRunner.On("context").Return(ctx) - captiveBackend := CaptiveStellarCore{ + captiveBackend := &CaptiveStellarCore{ nextLedger: 300, stellarCoreRunner: mockRunner, } @@ -388,7 +393,7 @@ func TestCaptivePrepareRange_ErrGettingRootHAS(t *testing.T) { On("GetRootHAS"). Return(historyarchive.HistoryArchiveState{}, errors.New("transient error")) - captiveBackend := CaptiveStellarCore{ + captiveBackend := &CaptiveStellarCore{ archive: mockArchive, } @@ -412,7 +417,7 @@ func TestCaptivePrepareRange_FromIsAheadOfRootHAS(t *testing.T) { CurrentLedger: uint32(64), }, nil) - captiveBackend := CaptiveStellarCore{ + captiveBackend := &CaptiveStellarCore{ archive: mockArchive, stellarCoreRunnerFactory: func() stellarCoreRunnerInterface { return mockRunner @@ -460,7 +465,7 @@ func TestCaptivePrepareRangeWithDB_FromIsAheadOfRootHAS(t *testing.T) { CurrentLedger: uint32(64), }, nil) - captiveBackend := CaptiveStellarCore{ + captiveBackend := &CaptiveStellarCore{ archive: mockArchive, useDB: true, stellarCoreRunnerFactory: func() stellarCoreRunnerInterface { @@ -499,7 +504,7 @@ func TestCaptivePrepareRange_ToIsAheadOfRootHAS(t *testing.T) { CurrentLedger: uint32(192), }, nil) - captiveBackend := CaptiveStellarCore{ + captiveBackend := &CaptiveStellarCore{ archive: mockArchive, stellarCoreRunnerFactory: func() stellarCoreRunnerInterface { return mockRunner @@ -527,7 +532,7 @@ func TestCaptivePrepareRange_ErrCatchup(t *testing.T) { ctx := context.Background() cancelCalled := false - captiveBackend := CaptiveStellarCore{ + captiveBackend := &CaptiveStellarCore{ archive: mockArchive, stellarCoreRunnerFactory: func() stellarCoreRunnerInterface { return mockRunner @@ -565,7 +570,7 @@ func TestCaptivePrepareRangeUnboundedRange_ErrRunFrom(t *testing.T) { ctx := context.Background() cancelCalled := false - captiveBackend := CaptiveStellarCore{ + captiveBackend := &CaptiveStellarCore{ archive: mockArchive, stellarCoreRunnerFactory: func() stellarCoreRunnerInterface { return mockRunner @@ -575,10 +580,13 @@ func TestCaptivePrepareRangeUnboundedRange_ErrRunFrom(t *testing.T) { cancelCalled = true }), } + captiveBackend.registerMetrics(prometheus.NewRegistry(), "test") err := captiveBackend.PrepareRange(ctx, UnboundedRange(128)) assert.EqualError(t, err, "error starting prepare range: opening subprocess: error running stellar-core: transient error") + assert.Equal(t, uint64(0), getStartDurationMetric(captiveBackend).GetSampleCount()) + // make sure we can Close without errors assert.NoError(t, captiveBackend.Close()) assert.True(t, cancelCalled) @@ -587,6 +595,15 @@ func TestCaptivePrepareRangeUnboundedRange_ErrRunFrom(t *testing.T) { mockRunner.AssertExpectations(t) } +func getStartDurationMetric(captiveCore *CaptiveStellarCore) *dto.Summary { + value := &dto.Metric{} + err := captiveCore.captiveCoreStartDuration.Write(value) + if err != nil { + panic(err) + } + return value.GetSummary() +} + func TestCaptivePrepareRangeUnboundedRange_ReuseSession(t *testing.T) { metaChan := make(chan metaResult, 100) @@ -617,21 +634,27 @@ func TestCaptivePrepareRangeUnboundedRange_ReuseSession(t *testing.T) { On("GetLedgerHeader", uint32(65)). Return(xdr.LedgerHeaderHistoryEntry{}, nil) - captiveBackend := CaptiveStellarCore{ + captiveBackend := &CaptiveStellarCore{ archive: mockArchive, stellarCoreRunnerFactory: func() stellarCoreRunnerInterface { return mockRunner }, checkpointManager: historyarchive.NewCheckpointManager(64), } + captiveBackend.registerMetrics(prometheus.NewRegistry(), "test") err := captiveBackend.PrepareRange(ctx, UnboundedRange(65)) assert.NoError(t, err) + assert.Equal(t, uint64(1), getStartDurationMetric(captiveBackend).GetSampleCount()) + assert.Greater(t, getStartDurationMetric(captiveBackend).GetSampleSum(), float64(0)) + captiveBackend.nextLedger = 64 err = captiveBackend.PrepareRange(ctx, UnboundedRange(65)) assert.NoError(t, err) + assert.Equal(t, uint64(1), getStartDurationMetric(captiveBackend).GetSampleCount()) + mockArchive.AssertExpectations(t) mockRunner.AssertExpectations(t) } @@ -665,7 +688,7 @@ func TestGetLatestLedgerSequence(t *testing.T) { On("GetLedgerHeader", uint32(64)). Return(xdr.LedgerHeaderHistoryEntry{}, nil) - captiveBackend := CaptiveStellarCore{ + captiveBackend := &CaptiveStellarCore{ archive: mockArchive, stellarCoreRunnerFactory: func() stellarCoreRunnerInterface { return mockRunner @@ -712,7 +735,7 @@ func TestGetLatestLedgerSequenceRaceCondition(t *testing.T) { On("GetLedgerHeader", mock.Anything). Return(xdr.LedgerHeaderHistoryEntry{}, nil) - captiveBackend := CaptiveStellarCore{ + captiveBackend := &CaptiveStellarCore{ archive: mockArchive, stellarCoreRunnerFactory: func() stellarCoreRunnerInterface { return mockRunner @@ -775,7 +798,7 @@ func TestCaptiveGetLedger(t *testing.T) { CurrentLedger: uint32(200), }, nil) - captiveBackend := CaptiveStellarCore{ + captiveBackend := &CaptiveStellarCore{ archive: mockArchive, stellarCoreRunnerFactory: func() stellarCoreRunnerInterface { return mockRunner @@ -873,7 +896,7 @@ func TestCaptiveGetLedgerCacheLatestLedger(t *testing.T) { }, }, nil).Once() - captiveBackend := CaptiveStellarCore{ + captiveBackend := &CaptiveStellarCore{ archive: mockArchive, stellarCoreRunnerFactory: func() stellarCoreRunnerInterface { return mockRunner @@ -928,7 +951,7 @@ func TestCaptiveGetLedger_NextLedgerIsDifferentToLedgerFromBuffer(t *testing.T) CurrentLedger: uint32(200), }, nil) - captiveBackend := CaptiveStellarCore{ + captiveBackend := &CaptiveStellarCore{ archive: mockArchive, stellarCoreRunnerFactory: func() stellarCoreRunnerInterface { return mockRunner @@ -978,7 +1001,7 @@ func TestCaptiveGetLedger_NextLedger0RangeFromIsSmallerThanLedgerFromBuffer(t *t On("GetLedgerHeader", uint32(65)). Return(xdr.LedgerHeaderHistoryEntry{}, nil) - captiveBackend := CaptiveStellarCore{ + captiveBackend := &CaptiveStellarCore{ archive: mockArchive, stellarCoreRunnerFactory: func() stellarCoreRunnerInterface { return mockRunner @@ -1081,7 +1104,7 @@ func TestCaptiveGetLedger_ErrReadingMetaResult(t *testing.T) { CurrentLedger: uint32(200), }, nil) - captiveBackend := CaptiveStellarCore{ + captiveBackend := &CaptiveStellarCore{ archive: mockArchive, stellarCoreRunnerFactory: func() stellarCoreRunnerInterface { return mockRunner @@ -1134,7 +1157,7 @@ func TestCaptiveGetLedger_ErrClosingAfterLastLedger(t *testing.T) { CurrentLedger: uint32(200), }, nil) - captiveBackend := CaptiveStellarCore{ + captiveBackend := &CaptiveStellarCore{ archive: mockArchive, stellarCoreRunnerFactory: func() stellarCoreRunnerInterface { return mockRunner @@ -1176,7 +1199,7 @@ func TestCaptiveAfterClose(t *testing.T) { CurrentLedger: uint32(200), }, nil) - captiveBackend := CaptiveStellarCore{ + captiveBackend := &CaptiveStellarCore{ archive: mockArchive, stellarCoreRunnerFactory: func() stellarCoreRunnerInterface { return mockRunner @@ -1230,7 +1253,7 @@ func TestGetLedgerBoundsCheck(t *testing.T) { CurrentLedger: uint32(200), }, nil) - captiveBackend := CaptiveStellarCore{ + captiveBackend := &CaptiveStellarCore{ archive: mockArchive, stellarCoreRunnerFactory: func() stellarCoreRunnerInterface { return mockRunner @@ -1356,7 +1379,7 @@ func TestCaptiveGetLedgerTerminatedUnexpectedly(t *testing.T) { CurrentLedger: uint32(200), }, nil) - captiveBackend := CaptiveStellarCore{ + captiveBackend := &CaptiveStellarCore{ archive: mockArchive, stellarCoreRunnerFactory: func() stellarCoreRunnerInterface { return mockRunner @@ -1410,7 +1433,7 @@ func TestCaptiveUseOfLedgerHashStore(t *testing.T) { Return("mnb", true, nil).Once() cancelCalled := false - captiveBackend := CaptiveStellarCore{ + captiveBackend := &CaptiveStellarCore{ archive: mockArchive, ledgerHashStore: mockLedgerHashStore, checkpointManager: historyarchive.NewCheckpointManager(64), @@ -1493,7 +1516,7 @@ func TestCaptiveRunFromParams(t *testing.T) { CurrentLedger: uint32(255), }, nil) - captiveBackend := CaptiveStellarCore{ + captiveBackend := &CaptiveStellarCore{ archive: mockArchive, checkpointManager: historyarchive.NewCheckpointManager(64), } @@ -1515,7 +1538,7 @@ func TestCaptiveIsPrepared(t *testing.T) { mockRunner.On("getProcessExitError").Return(nil, false) // c.prepared == nil - captiveBackend := CaptiveStellarCore{ + captiveBackend := &CaptiveStellarCore{ nextLedger: 0, } @@ -1548,7 +1571,7 @@ func TestCaptiveIsPrepared(t *testing.T) { for _, tc := range tests { t.Run(fmt.Sprintf("next_%d_last_%d_cached_%d_range_%v", tc.nextLedger, tc.lastLedger, tc.cachedLedger, tc.ledgerRange), func(t *testing.T) { - captiveBackend := CaptiveStellarCore{ + captiveBackend := &CaptiveStellarCore{ stellarCoreRunner: mockRunner, nextLedger: tc.nextLedger, prepared: &tc.preparedRange, @@ -1579,7 +1602,7 @@ func TestCaptiveIsPreparedCoreContextCancelled(t *testing.T) { mockRunner.On("getProcessExitError").Return(nil, false) rang := UnboundedRange(100) - captiveBackend := CaptiveStellarCore{ + captiveBackend := &CaptiveStellarCore{ nextLedger: 100, prepared: &rang, stellarCoreRunner: mockRunner, @@ -1650,7 +1673,7 @@ func TestCaptivePreviousLedgerCheck(t *testing.T) { mockLedgerHashStore.On("GetLedgerHash", ctx, uint32(299)). Return("", false, nil).Once() - captiveBackend := CaptiveStellarCore{ + captiveBackend := &CaptiveStellarCore{ archive: mockArchive, stellarCoreRunnerFactory: func() stellarCoreRunnerInterface { return mockRunner diff --git a/ingest/ledgerbackend/file_watcher_test.go b/ingest/ledgerbackend/file_watcher_test.go index 7e84bbfcf2..c3e85d643f 100644 --- a/ingest/ledgerbackend/file_watcher_test.go +++ b/ingest/ledgerbackend/file_watcher_test.go @@ -65,7 +65,7 @@ func createFWFixtures(t *testing.T) (*mockHash, *stellarCoreRunner, *fileWatcher Context: context.Background(), Toml: captiveCoreToml, StoragePath: storagePath, - }) + }, nil) fw, err := newFileWatcherWithOptions(runner, ms.hashFile, time.Millisecond) assert.NoError(t, err) @@ -96,7 +96,7 @@ func TestNewFileWatcherError(t *testing.T) { Context: context.Background(), Toml: captiveCoreToml, StoragePath: storagePath, - }) + }, nil) _, err = newFileWatcherWithOptions(runner, ms.hashFile, time.Millisecond) assert.EqualError(t, err, "could not hash captive core binary: test error") diff --git a/ingest/ledgerbackend/ledger_buffer.go b/ingest/ledgerbackend/ledger_buffer.go index 6965461bba..d23bf0bfbd 100644 --- a/ingest/ledgerbackend/ledger_buffer.go +++ b/ingest/ledgerbackend/ledger_buffer.go @@ -167,7 +167,7 @@ func (lb *ledgerBuffer) downloadLedgerObject(ctx context.Context, sequence uint3 reader, err := lb.dataStore.GetFile(ctx, objectKey) if err != nil { - return nil, err + return nil, errors.Wrapf(err, "unable to retrieve file: %s", objectKey) } defer reader.Close() diff --git a/ingest/ledgerbackend/run_from.go b/ingest/ledgerbackend/run_from.go index 2d02322519..8a424da105 100644 --- a/ingest/ledgerbackend/run_from.go +++ b/ingest/ledgerbackend/run_from.go @@ -6,32 +6,36 @@ import ( "fmt" "runtime" + "github.com/prometheus/client_golang/prometheus" + "github.com/stellar/go/protocols/stellarcore" "github.com/stellar/go/support/log" ) type runFromStream struct { - dir workingDir - from uint32 - hash string - coreCmdFactory coreCmdFactory - log *log.Entry - useDB bool + dir workingDir + from uint32 + hash string + coreCmdFactory coreCmdFactory + log *log.Entry + useDB bool + captiveCoreNewDBCounter prometheus.Counter } -func newRunFromStream(r *stellarCoreRunner, from uint32, hash string) runFromStream { +func newRunFromStream(r *stellarCoreRunner, from uint32, hash string, captiveCoreNewDBCounter prometheus.Counter) runFromStream { // We only use ephemeral directories on windows because there is // no way to terminate captive core gracefully on windows. // Having an ephemeral directory ensures that it is wiped out // whenever we terminate captive core dir := newWorkingDir(r, runtime.GOOS == "windows") return runFromStream{ - dir: dir, - from: from, - hash: hash, - coreCmdFactory: newCoreCmdFactory(r, dir), - log: r.log, - useDB: r.useDB, + dir: dir, + from: from, + hash: hash, + coreCmdFactory: newCoreCmdFactory(r, dir), + log: r.log, + useDB: r.useDB, + captiveCoreNewDBCounter: captiveCoreNewDBCounter, } } @@ -79,6 +83,9 @@ func (s runFromStream) start(ctx context.Context) (cmd cmdI, captiveCorePipe pip } if createNewDB { + if s.captiveCoreNewDBCounter != nil { + s.captiveCoreNewDBCounter.Inc() + } if err = s.dir.remove(); err != nil { return nil, pipe{}, fmt.Errorf("error removing existing storage-dir contents: %w", err) } diff --git a/ingest/ledgerbackend/stellar_core_runner.go b/ingest/ledgerbackend/stellar_core_runner.go index 5245051dce..4f95e94f45 100644 --- a/ingest/ledgerbackend/stellar_core_runner.go +++ b/ingest/ledgerbackend/stellar_core_runner.go @@ -7,6 +7,8 @@ import ( "math/rand" "sync" + "github.com/prometheus/client_golang/prometheus" + "github.com/stellar/go/support/log" ) @@ -67,6 +69,8 @@ type stellarCoreRunner struct { toml *CaptiveCoreToml useDB bool + captiveCoreNewDBCounter prometheus.Counter + log *log.Entry } @@ -79,7 +83,7 @@ func createRandomHexString(n int) string { return string(b) } -func newStellarCoreRunner(config CaptiveCoreConfig) *stellarCoreRunner { +func newStellarCoreRunner(config CaptiveCoreConfig, captiveCoreNewDBCounter prometheus.Counter) *stellarCoreRunner { ctx, cancel := context.WithCancel(config.Context) runner := &stellarCoreRunner{ @@ -91,7 +95,8 @@ func newStellarCoreRunner(config CaptiveCoreConfig) *stellarCoreRunner { log: config.Log, toml: config.Toml, - systemCaller: realSystemCaller{}, + captiveCoreNewDBCounter: captiveCoreNewDBCounter, + systemCaller: realSystemCaller{}, } return runner @@ -104,7 +109,7 @@ func (r *stellarCoreRunner) context() context.Context { // runFrom executes the run command with a starting ledger on the captive core subprocess func (r *stellarCoreRunner) runFrom(from uint32, hash string) error { - return r.startMetaStream(newRunFromStream(r, from, hash)) + return r.startMetaStream(newRunFromStream(r, from, hash, r.captiveCoreNewDBCounter)) } // catchup executes the catchup command on the captive core subprocess diff --git a/ingest/ledgerbackend/stellar_core_runner_test.go b/ingest/ledgerbackend/stellar_core_runner_test.go index f53cd88328..06b9c85fce 100644 --- a/ingest/ledgerbackend/stellar_core_runner_test.go +++ b/ingest/ledgerbackend/stellar_core_runner_test.go @@ -7,6 +7,8 @@ import ( "testing" "time" + "github.com/prometheus/client_golang/prometheus" + dto "github.com/prometheus/client_model/go" "github.com/stretchr/testify/assert" "github.com/stretchr/testify/mock" @@ -26,7 +28,7 @@ func TestCloseOffline(t *testing.T) { Context: context.Background(), Toml: captiveCoreToml, StoragePath: "/tmp/captive-core", - }) + }, nil) cmdMock := simpleCommandMock() cmdMock.On("Wait").Return(nil) @@ -68,7 +70,7 @@ func TestCloseOnline(t *testing.T) { Context: context.Background(), Toml: captiveCoreToml, StoragePath: "/tmp/captive-core", - }) + }, nil) cmdMock := simpleCommandMock() cmdMock.On("Wait").Return(nil) @@ -112,7 +114,7 @@ func TestCloseOnlineWithError(t *testing.T) { Context: context.Background(), Toml: captiveCoreToml, StoragePath: "/tmp/captive-core", - }) + }, nil) cmdMock := simpleCommandMock() cmdMock.On("Wait").Return(errors.New("wait error")) @@ -166,7 +168,7 @@ func TestCloseConcurrency(t *testing.T) { Context: context.Background(), Toml: captiveCoreToml, StoragePath: "/tmp/captive-core", - }) + }, nil) cmdMock := simpleCommandMock() cmdMock.On("Wait").Return(errors.New("wait error")).WaitUntil(time.After(time.Millisecond * 300)) @@ -223,7 +225,7 @@ func TestRunFromUseDBLedgersMatch(t *testing.T) { Toml: captiveCoreToml, StoragePath: "/tmp/captive-core", UseDB: true, - }) + }, createNewDBCounter()) cmdMock := simpleCommandMock() cmdMock.On("Wait").Return(nil) @@ -263,6 +265,8 @@ func TestRunFromUseDBLedgersMatch(t *testing.T) { assert.NoError(t, runner.runFrom(100, "hash")) assert.NoError(t, runner.close()) + + assert.Equal(t, float64(0), getNewDBCounterMetric(runner)) } func TestRunFromUseDBLedgersBehind(t *testing.T) { @@ -279,7 +283,7 @@ func TestRunFromUseDBLedgersBehind(t *testing.T) { Toml: captiveCoreToml, StoragePath: "/tmp/captive-core", UseDB: true, - }) + }, createNewDBCounter()) newDBCmdMock := simpleCommandMock() newDBCmdMock.On("Run").Return(nil) @@ -325,6 +329,23 @@ func TestRunFromUseDBLedgersBehind(t *testing.T) { assert.NoError(t, runner.runFrom(100, "hash")) assert.NoError(t, runner.close()) + + assert.Equal(t, float64(0), getNewDBCounterMetric(runner)) +} + +func createNewDBCounter() prometheus.Counter { + return prometheus.NewCounter(prometheus.CounterOpts{ + Namespace: "test", Subsystem: "captive_core", Name: "new_db_counter", + }) +} + +func getNewDBCounterMetric(runner *stellarCoreRunner) float64 { + value := &dto.Metric{} + err := runner.captiveCoreNewDBCounter.Write(value) + if err != nil { + panic(err) + } + return value.GetCounter().GetValue() } func TestRunFromUseDBLedgersInFront(t *testing.T) { @@ -341,7 +362,7 @@ func TestRunFromUseDBLedgersInFront(t *testing.T) { Toml: captiveCoreToml, StoragePath: "/tmp/captive-core", UseDB: true, - }) + }, createNewDBCounter()) newDBCmdMock := simpleCommandMock() newDBCmdMock.On("Run").Return(nil) @@ -405,4 +426,5 @@ func TestRunFromUseDBLedgersInFront(t *testing.T) { assert.NoError(t, runner.runFrom(100, "hash")) assert.NoError(t, runner.close()) + assert.Equal(t, float64(1), getNewDBCounterMetric(runner)) } diff --git a/services/galexie/docker/Dockerfile b/services/galexie/docker/Dockerfile index 6614a40692..42ff2e43d9 100644 --- a/services/galexie/docker/Dockerfile +++ b/services/galexie/docker/Dockerfile @@ -1,4 +1,4 @@ -FROM golang:1.22-bullseye AS builder +FROM golang:1.22-bookworm AS builder WORKDIR /go/src/github.com/stellar/go diff --git a/services/horizon/CONTRIBUTING.md b/services/horizon/CONTRIBUTING.md index 26ba4d1ad0..c8c598382d 100644 --- a/services/horizon/CONTRIBUTING.md +++ b/services/horizon/CONTRIBUTING.md @@ -1,6 +1,3 @@ # How to contribute -Please read the [Contribution Guide](https://github.com/stellar/docs/blob/master/CONTRIBUTING.md). - -Then please [sign the Contributor License Agreement](https://docs.google.com/forms/d/1g7EF6PERciwn7zfmfke5Sir2n10yddGGSXyZsq98tVY/viewform?usp=send_form). - +Please read the [Contribution Guide](https://github.com/stellar/.github/blob/master/CONTRIBUTING.md). diff --git a/services/horizon/docker/Dockerfile.dev b/services/horizon/docker/Dockerfile.dev index 5cef8d89d1..91b0f8aeea 100644 --- a/services/horizon/docker/Dockerfile.dev +++ b/services/horizon/docker/Dockerfile.dev @@ -1,4 +1,4 @@ -FROM golang:1.22-bullseye AS builder +FROM golang:1.22-bookworm AS builder ARG VERSION="devel" WORKDIR /go/src/github.com/stellar/go diff --git a/services/horizon/docker/captive-core-integration-tests.soroban-rpc.cfg b/services/horizon/docker/captive-core-integration-tests.soroban-rpc.cfg index 8d76504de2..0ce81a7f5c 100644 --- a/services/horizon/docker/captive-core-integration-tests.soroban-rpc.cfg +++ b/services/horizon/docker/captive-core-integration-tests.soroban-rpc.cfg @@ -1,4 +1,4 @@ -EXPERIMENTAL_BUCKETLIST_DB = true +DEPRECATED_SQL_LEDGER_STATE=false PEER_PORT=11725 ARTIFICIALLY_ACCELERATE_TIME_FOR_TESTING=true diff --git a/services/horizon/internal/actions/effects.go b/services/horizon/internal/actions/effects.go index e04997aca5..d8620fc11f 100644 --- a/services/horizon/internal/actions/effects.go +++ b/services/horizon/internal/actions/effects.go @@ -51,12 +51,12 @@ type GetEffectsHandler struct { } func (handler GetEffectsHandler) GetResourcePage(w HeaderWriter, r *http.Request) ([]hal.Pageable, error) { - pq, err := GetPageQuery(handler.LedgerState, r, DefaultTOID) + pq, err := GetPageQuery(handler.LedgerState, r) if err != nil { return nil, err } - err = validateCursorWithinHistory(handler.LedgerState, pq) + err = validateAndAdjustCursor(handler.LedgerState, &pq) if err != nil { return nil, err } diff --git a/services/horizon/internal/actions/helpers.go b/services/horizon/internal/actions/helpers.go index 2575b13251..99071bfba7 100644 --- a/services/horizon/internal/actions/helpers.go +++ b/services/horizon/internal/actions/helpers.go @@ -3,6 +3,7 @@ package actions import ( "context" "encoding/hex" + "errors" "fmt" "net/http" "net/url" @@ -20,7 +21,6 @@ import ( "github.com/stellar/go/services/horizon/internal/db2" "github.com/stellar/go/services/horizon/internal/ledger" hProblem "github.com/stellar/go/services/horizon/internal/render/problem" - "github.com/stellar/go/support/errors" "github.com/stellar/go/support/ordered" "github.com/stellar/go/support/render/problem" "github.com/stellar/go/toid" @@ -45,8 +45,6 @@ type Opt int const ( // DisableCursorValidation disables cursor validation in GetPageQuery DisableCursorValidation Opt = iota - // DefaultTOID sets a default cursor value in GetPageQuery based on the ledger state - DefaultTOID Opt = iota ) // HeaderWriter is an interface for setting HTTP response headers @@ -171,7 +169,7 @@ func getLimit(r *http.Request, name string, def uint64, max uint64) (uint64, err if asI64 <= 0 { err = errors.New("invalid limit: non-positive value provided") } else if asI64 > int64(max) { - err = errors.Errorf("invalid limit: value provided that is over limit max of %d", max) + err = fmt.Errorf("invalid limit: value provided that is over limit max of %d", max) } if err != nil { @@ -185,14 +183,10 @@ func getLimit(r *http.Request, name string, def uint64, max uint64) (uint64, err // using the results from a call to GetPagingParams() func GetPageQuery(ledgerState *ledger.State, r *http.Request, opts ...Opt) (db2.PageQuery, error) { disableCursorValidation := false - defaultTOID := false for _, opt := range opts { if opt == DisableCursorValidation { disableCursorValidation = true } - if opt == DefaultTOID { - defaultTOID = true - } } cursor, err := getCursor(ledgerState, r, ParamCursor) @@ -221,13 +215,6 @@ func GetPageQuery(ledgerState *ledger.State, r *http.Request, opts ...Opt) (db2. return db2.PageQuery{}, err } - if cursor == "" && defaultTOID { - if pageQuery.Order == db2.OrderAscending { - pageQuery.Cursor = toid.AfterLedger( - ordered.Max(0, ledgerState.CurrentStatus().HistoryElder-1), - ).String() - } - } return pageQuery, nil } @@ -553,19 +540,37 @@ func validateAssetParams(aType, code, issuer, prefix string) error { return nil } -// validateCursorWithinHistory compares the requested page of data against the +// validateAndAdjustCursor compares the requested page of data against the // ledger state of the history database. In the event that the cursor is // guaranteed to return no results, we return a 410 GONE http response. -func validateCursorWithinHistory(ledgerState *ledger.State, pq db2.PageQuery) error { - // an ascending query should never return a gone response: An ascending query - // prior to known history should return results at the beginning of history, - // and an ascending query beyond the end of history should not error out but - // rather return an empty page (allowing code that tracks the procession of - // some resource more easily). - if pq.Order != "desc" { - return nil +// For ascending queries, we adjust the cursor to ensure it starts at +// the oldest available ledger. +func validateAndAdjustCursor(ledgerState *ledger.State, pq *db2.PageQuery) error { + err := validateCursorWithinHistory(ledgerState, *pq) + + if pq.Order == db2.OrderAscending { + // an ascending query should never return a gone response: An ascending query + // prior to known history should return results at the beginning of history, + // and an ascending query beyond the end of history should not error out but + // rather return an empty page (allowing code that tracks the procession of + // some resource more easily). + + // set/modify the cursor for ascending queries to start at the oldest available ledger if it + // precedes the oldest ledger. This avoids inefficient queries caused by index bloat from deleted rows + // that are removed as part of reaping to maintain the retention window. + if pq.Cursor == "" || errors.Is(err, &hProblem.BeforeHistory) { + pq.Cursor = toid.AfterLedger( + ordered.Max(0, ledgerState.CurrentStatus().HistoryElder-1), + ).String() + return nil + } } + return err +} +// validateCursorWithinHistory checks if the cursor is within the known history range. +// If the cursor is before the oldest available ledger, it returns BeforeHistory error. +func validateCursorWithinHistory(ledgerState *ledger.State, pq db2.PageQuery) error { var cursor int64 var err error @@ -596,7 +601,7 @@ func countNonEmpty(params ...interface{}) (int, error) { for _, param := range params { switch param := param.(type) { default: - return 0, errors.Errorf("unexpected type %T", param) + return 0, fmt.Errorf("unexpected type %T", param) case int32: if param != 0 { count++ diff --git a/services/horizon/internal/actions/helpers_test.go b/services/horizon/internal/actions/helpers_test.go index 05e840577e..e393cc357e 100644 --- a/services/horizon/internal/actions/helpers_test.go +++ b/services/horizon/internal/actions/helpers_test.go @@ -16,6 +16,7 @@ import ( horizonContext "github.com/stellar/go/services/horizon/internal/context" "github.com/stellar/go/services/horizon/internal/db2" "github.com/stellar/go/services/horizon/internal/ledger" + hProblem "github.com/stellar/go/services/horizon/internal/render/problem" "github.com/stellar/go/services/horizon/internal/test" "github.com/stellar/go/support/db" "github.com/stellar/go/support/errors" @@ -126,11 +127,21 @@ func TestValidateCursorWithinHistory(t *testing.T) { { cursor: "0", order: "asc", - valid: true, + valid: false, }, { cursor: "0-1234", order: "asc", + valid: false, + }, + { + cursor: "1", + order: "asc", + valid: true, + }, + { + cursor: "1-1234", + order: "asc", valid: true, }, } @@ -291,11 +302,10 @@ func TestGetPageQuery(t *testing.T) { tt.Assert.Error(err) } -func TestGetPageQueryCursorDefaultTOID(t *testing.T) { - ascReq := makeTestActionRequest("/foo-bar/blah?limit=2", testURLParams()) - descReq := makeTestActionRequest("/foo-bar/blah?limit=2&order=desc", testURLParams()) - +func TestPageQueryCursorDefaultOrder(t *testing.T) { ledgerState := &ledger.State{} + + // truncated history ledgerState.SetHorizonStatus(ledger.HorizonStatus{ HistoryLatest: 7000, HistoryLatestClosedAt: time.Now(), @@ -303,30 +313,30 @@ func TestGetPageQueryCursorDefaultTOID(t *testing.T) { ExpHistoryLatest: 7000, }) - pq, err := GetPageQuery(ledgerState, ascReq, DefaultTOID) + req := makeTestActionRequest("/foo-bar/blah?limit=2", testURLParams()) + + // default asc, w/o cursor + pq, err := GetPageQuery(ledgerState, req) assert.NoError(t, err) + assert.Empty(t, pq.Cursor) + assert.NoError(t, validateAndAdjustCursor(ledgerState, &pq)) assert.Equal(t, toid.AfterLedger(299).String(), pq.Cursor) assert.Equal(t, uint64(2), pq.Limit) assert.Equal(t, "asc", pq.Order) - pq, err = GetPageQuery(ledgerState, descReq, DefaultTOID) - assert.NoError(t, err) - assert.Equal(t, "", pq.Cursor) - assert.Equal(t, uint64(2), pq.Limit) - assert.Equal(t, "desc", pq.Order) + cursor := toid.AfterLedger(200).String() + reqWithCursor := makeTestActionRequest(fmt.Sprintf("/foo-bar/blah?cursor=%s&limit=2", cursor), testURLParams()) - pq, err = GetPageQuery(ledgerState, ascReq) + // default asc, w/ cursor + pq, err = GetPageQuery(ledgerState, reqWithCursor) assert.NoError(t, err) - assert.Empty(t, pq.Cursor) + assert.Equal(t, cursor, pq.Cursor) + assert.NoError(t, validateAndAdjustCursor(ledgerState, &pq)) + assert.Equal(t, toid.AfterLedger(299).String(), pq.Cursor) assert.Equal(t, uint64(2), pq.Limit) assert.Equal(t, "asc", pq.Order) - pq, err = GetPageQuery(ledgerState, descReq) - assert.NoError(t, err) - assert.Empty(t, pq.Cursor) - assert.Equal(t, "", pq.Cursor) - assert.Equal(t, "desc", pq.Order) - + // full history ledgerState.SetHorizonStatus(ledger.HorizonStatus{ HistoryLatest: 7000, HistoryLatestClosedAt: time.Now(), @@ -334,18 +344,130 @@ func TestGetPageQueryCursorDefaultTOID(t *testing.T) { ExpHistoryLatest: 7000, }) - pq, err = GetPageQuery(ledgerState, ascReq, DefaultTOID) + // default asc, w/o cursor + pq, err = GetPageQuery(ledgerState, req) assert.NoError(t, err) + assert.Empty(t, pq.Cursor) + assert.NoError(t, validateAndAdjustCursor(ledgerState, &pq)) assert.Equal(t, toid.AfterLedger(0).String(), pq.Cursor) assert.Equal(t, uint64(2), pq.Limit) assert.Equal(t, "asc", pq.Order) - pq, err = GetPageQuery(ledgerState, descReq, DefaultTOID) + // default asc, w/ cursor + pq, err = GetPageQuery(ledgerState, reqWithCursor) assert.NoError(t, err) - assert.Equal(t, "", pq.Cursor) + assert.Equal(t, cursor, pq.Cursor) + assert.NoError(t, validateAndAdjustCursor(ledgerState, &pq)) + assert.Equal(t, cursor, pq.Cursor) assert.Equal(t, uint64(2), pq.Limit) - assert.Equal(t, "desc", pq.Order) + assert.Equal(t, "asc", pq.Order) + +} + +func TestGetPageQueryWithoutCursor(t *testing.T) { + ledgerState := &ledger.State{} + + validateCursor := func(limit uint64, order string, expectedCursor string) { + req := makeTestActionRequest(fmt.Sprintf("/foo-bar/blah?limit=%d&order=%s", limit, order), testURLParams()) + pq, err := GetPageQuery(ledgerState, req) + assert.NoError(t, err) + assert.Empty(t, pq.Cursor) + assert.NoError(t, validateAndAdjustCursor(ledgerState, &pq)) + assert.Equal(t, expectedCursor, pq.Cursor) + assert.Equal(t, limit, pq.Limit) + assert.Equal(t, order, pq.Order) + } + + // truncated history + ledgerState.SetHorizonStatus(ledger.HorizonStatus{ + HistoryLatest: 7000, + HistoryLatestClosedAt: time.Now(), + HistoryElder: 300, + ExpHistoryLatest: 7000, + }) + + validateCursor(2, "asc", toid.AfterLedger(299).String()) + validateCursor(2, "desc", "") + + // full history + ledgerState.SetHorizonStatus(ledger.HorizonStatus{ + HistoryLatest: 7000, + HistoryLatestClosedAt: time.Now(), + HistoryElder: 0, + ExpHistoryLatest: 7000, + }) + + validateCursor(2, "asc", toid.AfterLedger(0).String()) + validateCursor(2, "desc", "") +} + +func TestValidateAndAdjustCursor(t *testing.T) { + ledgerState := &ledger.State{} + + validateCursor := func(cursor string, limit uint64, order string, expectedCursor string, expectedError error) { + pq := db2.PageQuery{Cursor: cursor, + Limit: limit, + Order: order, + } + err := validateAndAdjustCursor(ledgerState, &pq) + if expectedError != nil { + assert.EqualError(t, expectedError, err.Error()) + } else { + assert.NoError(t, err) + } + assert.Equal(t, expectedCursor, pq.Cursor) + assert.Equal(t, limit, pq.Limit) + assert.Equal(t, order, pq.Order) + } + + // full history + ledgerState.SetHorizonStatus(ledger.HorizonStatus{ + HistoryLatest: 7000, + HistoryLatestClosedAt: time.Now(), + HistoryElder: 0, + ExpHistoryLatest: 7000, + }) + + // invalid cursor + validateCursor("blah", 2, "asc", "blah", problem.BadRequest) + validateCursor("blah", 2, "desc", "blah", problem.BadRequest) + + validateCursor(toid.AfterLedger(0).String(), 2, "asc", toid.AfterLedger(0).String(), nil) + validateCursor(toid.AfterLedger(200).String(), 2, "asc", toid.AfterLedger(200).String(), nil) + validateCursor(toid.AfterLedger(7001).String(), 2, "asc", toid.AfterLedger(7001).String(), nil) + + validateCursor(toid.AfterLedger(0).String(), 2, "desc", toid.AfterLedger(0).String(), nil) + validateCursor(toid.AfterLedger(200).String(), 2, "desc", toid.AfterLedger(200).String(), nil) + validateCursor(toid.AfterLedger(7001).String(), 2, "desc", toid.AfterLedger(7001).String(), nil) + + // truncated history + ledgerState.SetHorizonStatus(ledger.HorizonStatus{ + HistoryLatest: 7000, + HistoryLatestClosedAt: time.Now(), + HistoryElder: 300, + ExpHistoryLatest: 7000, + }) + // invalid cursor + validateCursor("blah", 2, "asc", "blah", problem.BadRequest) + validateCursor("blah", 2, "desc", "blah", problem.BadRequest) + + // asc order + validateCursor(toid.AfterLedger(0).String(), 2, "asc", toid.AfterLedger(299).String(), nil) + validateCursor(toid.AfterLedger(200).String(), 2, "asc", toid.AfterLedger(299).String(), nil) + validateCursor(toid.AfterLedger(298).String(), 2, "asc", toid.AfterLedger(299).String(), nil) + validateCursor(toid.AfterLedger(299).String(), 2, "asc", toid.AfterLedger(299).String(), nil) + validateCursor(toid.AfterLedger(300).String(), 2, "asc", toid.AfterLedger(300).String(), nil) + validateCursor(toid.AfterLedger(301).String(), 2, "asc", toid.AfterLedger(301).String(), nil) + validateCursor(toid.AfterLedger(7001).String(), 2, "asc", toid.AfterLedger(7001).String(), nil) + + // desc order + validateCursor(toid.AfterLedger(0).String(), 2, "desc", toid.AfterLedger(0).String(), hProblem.BeforeHistory) + validateCursor(toid.AfterLedger(200).String(), 2, "desc", toid.AfterLedger(200).String(), hProblem.BeforeHistory) + validateCursor(toid.AfterLedger(299).String(), 2, "desc", toid.AfterLedger(299).String(), hProblem.BeforeHistory) + validateCursor(toid.AfterLedger(300).String(), 2, "desc", toid.AfterLedger(300).String(), nil) + validateCursor(toid.AfterLedger(320).String(), 2, "desc", toid.AfterLedger(320).String(), nil) + validateCursor(toid.AfterLedger(7001).String(), 2, "desc", toid.AfterLedger(7001).String(), nil) } func TestGetString(t *testing.T) { diff --git a/services/horizon/internal/actions/ledger.go b/services/horizon/internal/actions/ledger.go index 0b3d51b8e1..0836ba9b10 100644 --- a/services/horizon/internal/actions/ledger.go +++ b/services/horizon/internal/actions/ledger.go @@ -17,12 +17,12 @@ type GetLedgersHandler struct { } func (handler GetLedgersHandler) GetResourcePage(w HeaderWriter, r *http.Request) ([]hal.Pageable, error) { - pq, err := GetPageQuery(handler.LedgerState, r, DefaultTOID) + pq, err := GetPageQuery(handler.LedgerState, r) if err != nil { return nil, err } - err = validateCursorWithinHistory(handler.LedgerState, pq) + err = validateAndAdjustCursor(handler.LedgerState, &pq) if err != nil { return nil, err } diff --git a/services/horizon/internal/actions/operation.go b/services/horizon/internal/actions/operation.go index 9ccadb272e..6b200cba29 100644 --- a/services/horizon/internal/actions/operation.go +++ b/services/horizon/internal/actions/operation.go @@ -72,12 +72,12 @@ type GetOperationsHandler struct { func (handler GetOperationsHandler) GetResourcePage(w HeaderWriter, r *http.Request) ([]hal.Pageable, error) { ctx := r.Context() - pq, err := GetPageQuery(handler.LedgerState, r, DefaultTOID) + pq, err := GetPageQuery(handler.LedgerState, r) if err != nil { return nil, err } - err = validateCursorWithinHistory(handler.LedgerState, pq) + err = validateAndAdjustCursor(handler.LedgerState, &pq) if err != nil { return nil, err } diff --git a/services/horizon/internal/actions/trade.go b/services/horizon/internal/actions/trade.go index b29ad64715..57d97593cf 100644 --- a/services/horizon/internal/actions/trade.go +++ b/services/horizon/internal/actions/trade.go @@ -159,12 +159,12 @@ type GetTradesHandler struct { func (handler GetTradesHandler) GetResourcePage(w HeaderWriter, r *http.Request) ([]hal.Pageable, error) { ctx := r.Context() - pq, err := GetPageQuery(handler.LedgerState, r, DefaultTOID) + pq, err := GetPageQuery(handler.LedgerState, r) if err != nil { return nil, err } - err = validateCursorWithinHistory(handler.LedgerState, pq) + err = validateAndAdjustCursor(handler.LedgerState, &pq) if err != nil { return nil, err } @@ -287,10 +287,7 @@ func (handler GetTradeAggregationsHandler) GetResource(w HeaderWriter, r *http.R if err != nil { return nil, err } - err = validateCursorWithinHistory(handler.LedgerState, pq) - if err != nil { - return nil, err - } + qp := TradeAggregationsQuery{} if err = getParams(&qp, r); err != nil { return nil, err diff --git a/services/horizon/internal/actions/transaction.go b/services/horizon/internal/actions/transaction.go index 6a2fd1c6e2..ed579f9d6f 100644 --- a/services/horizon/internal/actions/transaction.go +++ b/services/horizon/internal/actions/transaction.go @@ -98,12 +98,12 @@ type GetTransactionsHandler struct { func (handler GetTransactionsHandler) GetResourcePage(w HeaderWriter, r *http.Request) ([]hal.Pageable, error) { ctx := r.Context() - pq, err := GetPageQuery(handler.LedgerState, r, DefaultTOID) + pq, err := GetPageQuery(handler.LedgerState, r) if err != nil { return nil, err } - err = validateCursorWithinHistory(handler.LedgerState, pq) + err = validateAndAdjustCursor(handler.LedgerState, &pq) if err != nil { return nil, err } diff --git a/services/horizon/internal/db2/history/account_loader.go b/services/horizon/internal/db2/history/account_loader.go index e7e7e90854..f69e7d7f48 100644 --- a/services/horizon/internal/db2/history/account_loader.go +++ b/services/horizon/internal/db2/history/account_loader.go @@ -1,220 +1,47 @@ package history import ( - "context" - "database/sql/driver" - "fmt" - "sort" - "strings" - - "github.com/lib/pq" + "cmp" "github.com/stellar/go/support/collections/set" - "github.com/stellar/go/support/db" - "github.com/stellar/go/support/errors" - "github.com/stellar/go/support/ordered" ) // FutureAccountID represents a future history account. // A FutureAccountID is created by an AccountLoader and // the account id is available after calling Exec() on // the AccountLoader. -type FutureAccountID struct { - address string - loader *AccountLoader -} - -const loaderLookupBatchSize = 50000 - -// Value implements the database/sql/driver Valuer interface. -func (a FutureAccountID) Value() (driver.Value, error) { - return a.loader.GetNow(a.address) -} +type FutureAccountID = future[string, Account] // AccountLoader will map account addresses to their history // account ids. If there is no existing mapping for a given address, // the AccountLoader will insert into the history_accounts table to // establish a mapping. -type AccountLoader struct { - sealed bool - set set.Set[string] - ids map[string]int64 - stats LoaderStats -} - -var errSealed = errors.New("cannot register more entries to loader after calling Exec()") +type AccountLoader = loader[string, Account] // NewAccountLoader will construct a new AccountLoader instance. -func NewAccountLoader() *AccountLoader { +func NewAccountLoader(concurrencyMode ConcurrencyMode) *AccountLoader { return &AccountLoader{ sealed: false, set: set.Set[string]{}, ids: map[string]int64{}, stats: LoaderStats{}, - } -} - -// GetFuture registers the given account address into the loader and -// returns a FutureAccountID which will hold the history account id for -// the address after Exec() is called. -func (a *AccountLoader) GetFuture(address string) FutureAccountID { - if a.sealed { - panic(errSealed) - } - - a.set.Add(address) - return FutureAccountID{ - address: address, - loader: a, - } -} - -// GetNow returns the history account id for the given address. -// GetNow should only be called on values which were registered by -// GetFuture() calls. Also, Exec() must be called before any GetNow -// call can succeed. -func (a *AccountLoader) GetNow(address string) (int64, error) { - if !a.sealed { - return 0, fmt.Errorf(`invalid account loader state, - Exec was not called yet to properly seal and resolve %v id`, address) - } - if internalID, ok := a.ids[address]; !ok { - return 0, fmt.Errorf(`account loader address %q was not found`, address) - } else { - return internalID, nil - } -} - -func (a *AccountLoader) lookupKeys(ctx context.Context, q *Q, addresses []string) error { - for i := 0; i < len(addresses); i += loaderLookupBatchSize { - end := ordered.Min(len(addresses), i+loaderLookupBatchSize) - - var accounts []Account - if err := q.AccountsByAddresses(ctx, &accounts, addresses[i:end]); err != nil { - return errors.Wrap(err, "could not select accounts") - } - - for _, account := range accounts { - a.ids[account.Address] = account.ID - } - } - return nil -} - -// LoaderStats describes the result of executing a history lookup id loader -type LoaderStats struct { - // Total is the number of elements registered to the loader - Total int - // Inserted is the number of elements inserted into the lookup table - Inserted int -} - -// Exec will look up all the history account ids for the addresses registered in the loader. -// If there are no history account ids for a given set of addresses, Exec will insert rows -// into the history_accounts table to establish a mapping between address and history account id. -func (a *AccountLoader) Exec(ctx context.Context, session db.SessionInterface) error { - a.sealed = true - if len(a.set) == 0 { - return nil - } - q := &Q{session} - addresses := make([]string, 0, len(a.set)) - for address := range a.set { - addresses = append(addresses, address) - } - - if err := a.lookupKeys(ctx, q, addresses); err != nil { - return err - } - a.stats.Total += len(addresses) - - insert := 0 - for _, address := range addresses { - if _, ok := a.ids[address]; ok { - continue - } - addresses[insert] = address - insert++ - } - if insert == 0 { - return nil - } - addresses = addresses[:insert] - // sort entries before inserting rows to prevent deadlocks on acquiring a ShareLock - // https://github.com/stellar/go/issues/2370 - sort.Strings(addresses) - - err := bulkInsert( - ctx, - q, - "history_accounts", - []string{"address"}, - []bulkInsertField{ - { - name: "address", - dbType: "character varying(64)", - objects: addresses, - }, + name: "AccountLoader", + table: "history_accounts", + columnsForKeys: func(addresses []string) []columnValues { + return []columnValues{ + { + name: "address", + dbType: "character varying(64)", + objects: addresses, + }, + } }, - ) - if err != nil { - return err - } - a.stats.Inserted += insert - - return a.lookupKeys(ctx, q, addresses) -} - -// Stats returns the number of addresses registered in the loader and the number of addresses -// inserted into the history_accounts table. -func (a *AccountLoader) Stats() LoaderStats { - return a.stats -} - -func (a *AccountLoader) Name() string { - return "AccountLoader" -} - -type bulkInsertField struct { - name string - dbType string - objects []string -} - -func bulkInsert(ctx context.Context, q *Q, table string, conflictFields []string, fields []bulkInsertField) error { - unnestPart := make([]string, 0, len(fields)) - insertFieldsPart := make([]string, 0, len(fields)) - pqArrays := make([]interface{}, 0, len(fields)) - - for _, field := range fields { - unnestPart = append( - unnestPart, - fmt.Sprintf("unnest(?::%s[]) /* %s */", field.dbType, field.name), - ) - insertFieldsPart = append( - insertFieldsPart, - field.name, - ) - pqArrays = append( - pqArrays, - pq.Array(field.objects), - ) + mappingFromRow: func(account Account) (string, int64) { + return account.Address, account.ID + }, + less: cmp.Less[string], + concurrencyMode: concurrencyMode, } - - sql := ` - WITH r AS - (SELECT ` + strings.Join(unnestPart, ",") + `) - INSERT INTO ` + table + ` - (` + strings.Join(insertFieldsPart, ",") + `) - SELECT * from r - ON CONFLICT (` + strings.Join(conflictFields, ",") + `) DO NOTHING` - - _, err := q.ExecRaw( - context.WithValue(ctx, &db.QueryTypeContextKey, db.UpsertQueryType), - sql, - pqArrays..., - ) - return err } // AccountLoaderStub is a stub wrapper around AccountLoader which allows @@ -225,7 +52,7 @@ type AccountLoaderStub struct { // NewAccountLoaderStub returns a new AccountLoaderStub instance func NewAccountLoaderStub() AccountLoaderStub { - return AccountLoaderStub{Loader: NewAccountLoader()} + return AccountLoaderStub{Loader: NewAccountLoader(ConcurrentInserts)} } // Insert updates the wrapped AccountLoader so that the given account diff --git a/services/horizon/internal/db2/history/account_loader_test.go b/services/horizon/internal/db2/history/account_loader_test.go index ed30b43bd9..83b172b40b 100644 --- a/services/horizon/internal/db2/history/account_loader_test.go +++ b/services/horizon/internal/db2/history/account_loader_test.go @@ -8,6 +8,7 @@ import ( "github.com/stellar/go/keypair" "github.com/stellar/go/services/horizon/internal/test" + "github.com/stellar/go/support/db" ) func TestAccountLoader(t *testing.T) { @@ -16,17 +17,23 @@ func TestAccountLoader(t *testing.T) { test.ResetHorizonDB(t, tt.HorizonDB) session := tt.HorizonSession() + testAccountLoader(t, session, ConcurrentInserts) + test.ResetHorizonDB(t, tt.HorizonDB) + testAccountLoader(t, session, ConcurrentDeletes) +} + +func testAccountLoader(t *testing.T, session *db.Session, mode ConcurrencyMode) { var addresses []string for i := 0; i < 100; i++ { addresses = append(addresses, keypair.MustRandom().Address()) } - loader := NewAccountLoader() + loader := NewAccountLoader(mode) for _, address := range addresses { future := loader.GetFuture(address) _, err := future.Value() assert.Error(t, err) - assert.Contains(t, err.Error(), `invalid account loader state,`) + assert.Contains(t, err.Error(), `invalid loader state,`) duplicateFuture := loader.GetFuture(address) assert.Equal(t, future, duplicateFuture) } @@ -55,4 +62,34 @@ func TestAccountLoader(t *testing.T) { _, err = loader.GetNow("not present") assert.Error(t, err) assert.Contains(t, err.Error(), `was not found`) + + // check that Loader works when all the previous values are already + // present in the db and also add 10 more rows to insert + loader = NewAccountLoader(mode) + for i := 0; i < 10; i++ { + addresses = append(addresses, keypair.MustRandom().Address()) + } + + for _, address := range addresses { + future := loader.GetFuture(address) + _, err = future.Value() + assert.Error(t, err) + assert.Contains(t, err.Error(), `invalid loader state,`) + } + + assert.NoError(t, loader.Exec(context.Background(), session)) + assert.Equal(t, LoaderStats{ + Total: 110, + Inserted: 10, + }, loader.Stats()) + + for _, address := range addresses { + var internalId int64 + internalId, err = loader.GetNow(address) + assert.NoError(t, err) + var account Account + assert.NoError(t, q.AccountByAddress(context.Background(), &account, address)) + assert.Equal(t, account.ID, internalId) + assert.Equal(t, account.Address, address) + } } diff --git a/services/horizon/internal/db2/history/asset_loader.go b/services/horizon/internal/db2/history/asset_loader.go index fe17dc17be..33c5c333dd 100644 --- a/services/horizon/internal/db2/history/asset_loader.go +++ b/services/horizon/internal/db2/history/asset_loader.go @@ -1,16 +1,9 @@ package history import ( - "context" - "database/sql/driver" - "fmt" - "sort" "strings" "github.com/stellar/go/support/collections/set" - "github.com/stellar/go/support/db" - "github.com/stellar/go/support/errors" - "github.com/stellar/go/support/ordered" "github.com/stellar/go/xdr" ) @@ -40,180 +33,63 @@ func AssetKeyFromXDR(asset xdr.Asset) AssetKey { // A FutureAssetID is created by an AssetLoader and // the asset id is available after calling Exec() on // the AssetLoader. -type FutureAssetID struct { - asset AssetKey - loader *AssetLoader -} - -// Value implements the database/sql/driver Valuer interface. -func (a FutureAssetID) Value() (driver.Value, error) { - return a.loader.GetNow(a.asset) -} +type FutureAssetID = future[AssetKey, Asset] // AssetLoader will map assets to their history // asset ids. If there is no existing mapping for a given sset, // the AssetLoader will insert into the history_assets table to // establish a mapping. -type AssetLoader struct { - sealed bool - set set.Set[AssetKey] - ids map[AssetKey]int64 - stats LoaderStats -} +type AssetLoader = loader[AssetKey, Asset] // NewAssetLoader will construct a new AssetLoader instance. -func NewAssetLoader() *AssetLoader { +func NewAssetLoader(concurrencyMode ConcurrencyMode) *AssetLoader { return &AssetLoader{ sealed: false, set: set.Set[AssetKey]{}, ids: map[AssetKey]int64{}, stats: LoaderStats{}, - } -} - -// GetFuture registers the given asset into the loader and -// returns a FutureAssetID which will hold the history asset id for -// the asset after Exec() is called. -func (a *AssetLoader) GetFuture(asset AssetKey) FutureAssetID { - if a.sealed { - panic(errSealed) - } - a.set.Add(asset) - return FutureAssetID{ - asset: asset, - loader: a, - } -} - -// GetNow returns the history asset id for the given asset. -// GetNow should only be called on values which were registered by -// GetFuture() calls. Also, Exec() must be called before any GetNow -// call can succeed. -func (a *AssetLoader) GetNow(asset AssetKey) (int64, error) { - if !a.sealed { - return 0, fmt.Errorf(`invalid asset loader state, - Exec was not called yet to properly seal and resolve %v id`, asset) - } - if internalID, ok := a.ids[asset]; !ok { - return 0, fmt.Errorf(`asset loader id %v was not found`, asset) - } else { - return internalID, nil - } -} - -func (a *AssetLoader) lookupKeys(ctx context.Context, q *Q, keys []AssetKey) error { - var rows []Asset - for i := 0; i < len(keys); i += loaderLookupBatchSize { - end := ordered.Min(len(keys), i+loaderLookupBatchSize) - subset := keys[i:end] - args := make([]interface{}, 0, 3*len(subset)) - placeHolders := make([]string, 0, len(subset)) - for _, key := range subset { - args = append(args, key.Code, key.Type, key.Issuer) - placeHolders = append(placeHolders, "(?, ?, ?)") - } - rawSQL := fmt.Sprintf( - "SELECT * FROM history_assets WHERE (asset_code, asset_type, asset_issuer) in (%s)", - strings.Join(placeHolders, ", "), - ) - err := q.SelectRaw(ctx, &rows, rawSQL, args...) - if err != nil { - return errors.Wrap(err, "could not select assets") - } - - for _, row := range rows { - a.ids[AssetKey{ - Type: row.Type, - Code: row.Code, - Issuer: row.Issuer, - }] = row.ID - } - } - return nil -} - -// Exec will look up all the history asset ids for the assets registered in the loader. -// If there are no history asset ids for a given set of assets, Exec will insert rows -// into the history_assets table. -func (a *AssetLoader) Exec(ctx context.Context, session db.SessionInterface) error { - a.sealed = true - if len(a.set) == 0 { - return nil - } - q := &Q{session} - keys := make([]AssetKey, 0, len(a.set)) - for key := range a.set { - keys = append(keys, key) - } - - if err := a.lookupKeys(ctx, q, keys); err != nil { - return err - } - a.stats.Total += len(keys) - - assetTypes := make([]string, 0, len(a.set)-len(a.ids)) - assetCodes := make([]string, 0, len(a.set)-len(a.ids)) - assetIssuers := make([]string, 0, len(a.set)-len(a.ids)) - // sort entries before inserting rows to prevent deadlocks on acquiring a ShareLock - // https://github.com/stellar/go/issues/2370 - sort.Slice(keys, func(i, j int) bool { - return keys[i].String() < keys[j].String() - }) - insert := 0 - for _, key := range keys { - if _, ok := a.ids[key]; ok { - continue - } - assetTypes = append(assetTypes, key.Type) - assetCodes = append(assetCodes, key.Code) - assetIssuers = append(assetIssuers, key.Issuer) - keys[insert] = key - insert++ - } - if insert == 0 { - return nil - } - keys = keys[:insert] - - err := bulkInsert( - ctx, - q, - "history_assets", - []string{"asset_code", "asset_type", "asset_issuer"}, - []bulkInsertField{ - { - name: "asset_code", - dbType: "character varying(12)", - objects: assetCodes, - }, - { - name: "asset_issuer", - dbType: "character varying(56)", - objects: assetIssuers, - }, - { - name: "asset_type", - dbType: "character varying(64)", - objects: assetTypes, - }, + name: "AssetLoader", + table: "history_assets", + columnsForKeys: func(keys []AssetKey) []columnValues { + assetTypes := make([]string, 0, len(keys)) + assetCodes := make([]string, 0, len(keys)) + assetIssuers := make([]string, 0, len(keys)) + for _, key := range keys { + assetTypes = append(assetTypes, key.Type) + assetCodes = append(assetCodes, key.Code) + assetIssuers = append(assetIssuers, key.Issuer) + } + + return []columnValues{ + { + name: "asset_code", + dbType: "character varying(12)", + objects: assetCodes, + }, + { + name: "asset_type", + dbType: "character varying(64)", + objects: assetTypes, + }, + { + name: "asset_issuer", + dbType: "character varying(56)", + objects: assetIssuers, + }, + } + }, + mappingFromRow: func(asset Asset) (AssetKey, int64) { + return AssetKey{ + Type: asset.Type, + Code: asset.Code, + Issuer: asset.Issuer, + }, asset.ID }, - ) - if err != nil { - return err + less: func(a AssetKey, b AssetKey) bool { + return a.String() < b.String() + }, + concurrencyMode: concurrencyMode, } - a.stats.Inserted += insert - - return a.lookupKeys(ctx, q, keys) -} - -// Stats returns the number of assets registered in the loader and the number of assets -// inserted into the history_assets table. -func (a *AssetLoader) Stats() LoaderStats { - return a.stats -} - -func (a *AssetLoader) Name() string { - return "AssetLoader" } // AssetLoaderStub is a stub wrapper around AssetLoader which allows @@ -224,7 +100,7 @@ type AssetLoaderStub struct { // NewAssetLoaderStub returns a new AssetLoaderStub instance func NewAssetLoaderStub() AssetLoaderStub { - return AssetLoaderStub{Loader: NewAssetLoader()} + return AssetLoaderStub{Loader: NewAssetLoader(ConcurrentInserts)} } // Insert updates the wrapped AssetLoaderStub so that the given asset diff --git a/services/horizon/internal/db2/history/asset_loader_test.go b/services/horizon/internal/db2/history/asset_loader_test.go index f097561e4a..e7a0495cad 100644 --- a/services/horizon/internal/db2/history/asset_loader_test.go +++ b/services/horizon/internal/db2/history/asset_loader_test.go @@ -9,6 +9,7 @@ import ( "github.com/stellar/go/keypair" "github.com/stellar/go/services/horizon/internal/test" + "github.com/stellar/go/support/db" "github.com/stellar/go/xdr" ) @@ -40,6 +41,12 @@ func TestAssetLoader(t *testing.T) { test.ResetHorizonDB(t, tt.HorizonDB) session := tt.HorizonSession() + testAssetLoader(t, session, ConcurrentInserts) + test.ResetHorizonDB(t, tt.HorizonDB) + testAssetLoader(t, session, ConcurrentDeletes) +} + +func testAssetLoader(t *testing.T, session *db.Session, mode ConcurrencyMode) { var keys []AssetKey for i := 0; i < 100; i++ { var key AssetKey @@ -66,12 +73,12 @@ func TestAssetLoader(t *testing.T) { keys = append(keys, key) } - loader := NewAssetLoader() + loader := NewAssetLoader(mode) for _, key := range keys { future := loader.GetFuture(key) _, err := future.Value() assert.Error(t, err) - assert.Contains(t, err.Error(), `invalid asset loader state,`) + assert.Contains(t, err.Error(), `invalid loader state,`) duplicateFuture := loader.GetFuture(key) assert.Equal(t, future, duplicateFuture) } @@ -106,4 +113,58 @@ func TestAssetLoader(t *testing.T) { _, err = loader.GetNow(AssetKey{}) assert.Error(t, err) assert.Contains(t, err.Error(), `was not found`) + + // check that Loader works when all the previous values are already + // present in the db and also add 10 more rows to insert + loader = NewAssetLoader(mode) + for i := 0; i < 10; i++ { + var key AssetKey + if i%2 == 0 { + code := [4]byte{0, 0, 0, 0} + copy(code[:], fmt.Sprintf("ab%d", i)) + key = AssetKeyFromXDR(xdr.Asset{ + Type: xdr.AssetTypeAssetTypeCreditAlphanum4, + AlphaNum4: &xdr.AlphaNum4{ + AssetCode: code, + Issuer: xdr.MustAddress(keypair.MustRandom().Address())}}) + } else { + code := [12]byte{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} + copy(code[:], fmt.Sprintf("abcdef%d", i)) + key = AssetKeyFromXDR(xdr.Asset{ + Type: xdr.AssetTypeAssetTypeCreditAlphanum12, + AlphaNum12: &xdr.AlphaNum12{ + AssetCode: code, + Issuer: xdr.MustAddress(keypair.MustRandom().Address())}}) + + } + keys = append(keys, key) + } + + for _, key := range keys { + future := loader.GetFuture(key) + _, err = future.Value() + assert.Error(t, err) + assert.Contains(t, err.Error(), `invalid loader state,`) + } + assert.NoError(t, loader.Exec(context.Background(), session)) + assert.Equal(t, LoaderStats{ + Total: 110, + Inserted: 10, + }, loader.Stats()) + + for _, key := range keys { + var internalID int64 + internalID, err = loader.GetNow(key) + assert.NoError(t, err) + var assetXDR xdr.Asset + if key.Type == "native" { + assetXDR = xdr.MustNewNativeAsset() + } else { + assetXDR = xdr.MustNewCreditAsset(key.Code, key.Issuer) + } + var assetID int64 + assetID, err = q.GetAssetID(context.Background(), assetXDR) + assert.NoError(t, err) + assert.Equal(t, assetID, internalID) + } } diff --git a/services/horizon/internal/db2/history/claimable_balance_loader.go b/services/horizon/internal/db2/history/claimable_balance_loader.go index ef18683cb6..9107d4fb9f 100644 --- a/services/horizon/internal/db2/history/claimable_balance_loader.go +++ b/services/horizon/internal/db2/history/claimable_balance_loader.go @@ -1,161 +1,45 @@ package history import ( - "context" - "database/sql/driver" - "fmt" - "sort" + "cmp" "github.com/stellar/go/support/collections/set" - "github.com/stellar/go/support/db" - "github.com/stellar/go/support/errors" - "github.com/stellar/go/support/ordered" ) // FutureClaimableBalanceID represents a future history claimable balance. // A FutureClaimableBalanceID is created by a ClaimableBalanceLoader and // the claimable balance id is available after calling Exec() on // the ClaimableBalanceLoader. -type FutureClaimableBalanceID struct { - id string - loader *ClaimableBalanceLoader -} - -// Value implements the database/sql/driver Valuer interface. -func (a FutureClaimableBalanceID) Value() (driver.Value, error) { - return a.loader.getNow(a.id) -} +type FutureClaimableBalanceID = future[string, HistoryClaimableBalance] // ClaimableBalanceLoader will map claimable balance ids to their internal // history ids. If there is no existing mapping for a given claimable balance id, // the ClaimableBalanceLoader will insert into the history_claimable_balances table to // establish a mapping. -type ClaimableBalanceLoader struct { - sealed bool - set set.Set[string] - ids map[string]int64 - stats LoaderStats -} +type ClaimableBalanceLoader = loader[string, HistoryClaimableBalance] // NewClaimableBalanceLoader will construct a new ClaimableBalanceLoader instance. -func NewClaimableBalanceLoader() *ClaimableBalanceLoader { +func NewClaimableBalanceLoader(concurrencyMode ConcurrencyMode) *ClaimableBalanceLoader { return &ClaimableBalanceLoader{ sealed: false, set: set.Set[string]{}, ids: map[string]int64{}, stats: LoaderStats{}, - } -} - -// GetFuture registers the given claimable balance into the loader and -// returns a FutureClaimableBalanceID which will hold the internal history id for -// the claimable balance after Exec() is called. -func (a *ClaimableBalanceLoader) GetFuture(id string) FutureClaimableBalanceID { - if a.sealed { - panic(errSealed) - } - - a.set.Add(id) - return FutureClaimableBalanceID{ - id: id, - loader: a, - } -} - -// getNow returns the internal history id for the given claimable balance. -// getNow should only be called on values which were registered by -// GetFuture() calls. Also, Exec() must be called before any getNow -// call can succeed. -func (a *ClaimableBalanceLoader) getNow(id string) (int64, error) { - if !a.sealed { - return 0, fmt.Errorf(`invalid claimable balance loader state, - Exec was not called yet to properly seal and resolve %v id`, id) - } - if internalID, ok := a.ids[id]; !ok { - return 0, fmt.Errorf(`claimable balance loader id %q was not found`, id) - } else { - return internalID, nil - } -} - -func (a *ClaimableBalanceLoader) lookupKeys(ctx context.Context, q *Q, ids []string) error { - for i := 0; i < len(ids); i += loaderLookupBatchSize { - end := ordered.Min(len(ids), i+loaderLookupBatchSize) - - cbs, err := q.ClaimableBalancesByIDs(ctx, ids[i:end]) - if err != nil { - return errors.Wrap(err, "could not select claimable balances") - } - - for _, cb := range cbs { - a.ids[cb.BalanceID] = cb.InternalID - } - } - return nil -} - -// Exec will look up all the internal history ids for the claimable balances registered in the loader. -// If there are no internal ids for a given set of claimable balances, Exec will insert rows -// into the history_claimable_balances table. -func (a *ClaimableBalanceLoader) Exec(ctx context.Context, session db.SessionInterface) error { - a.sealed = true - if len(a.set) == 0 { - return nil - } - q := &Q{session} - ids := make([]string, 0, len(a.set)) - for id := range a.set { - ids = append(ids, id) - } - - if err := a.lookupKeys(ctx, q, ids); err != nil { - return err - } - a.stats.Total += len(ids) - - insert := 0 - for _, id := range ids { - if _, ok := a.ids[id]; ok { - continue - } - ids[insert] = id - insert++ - } - if insert == 0 { - return nil - } - ids = ids[:insert] - // sort entries before inserting rows to prevent deadlocks on acquiring a ShareLock - // https://github.com/stellar/go/issues/2370 - sort.Strings(ids) - - err := bulkInsert( - ctx, - q, - "history_claimable_balances", - []string{"claimable_balance_id"}, - []bulkInsertField{ - { - name: "claimable_balance_id", - dbType: "text", - objects: ids, - }, + name: "ClaimableBalanceLoader", + table: "history_claimable_balances", + columnsForKeys: func(keys []string) []columnValues { + return []columnValues{ + { + name: "claimable_balance_id", + dbType: "text", + objects: keys, + }, + } }, - ) - if err != nil { - return err + mappingFromRow: func(row HistoryClaimableBalance) (string, int64) { + return row.BalanceID, row.InternalID + }, + less: cmp.Less[string], + concurrencyMode: concurrencyMode, } - a.stats.Inserted += insert - - return a.lookupKeys(ctx, q, ids) -} - -// Stats returns the number of claimable balances registered in the loader and the number of claimable balances -// inserted into the history_claimable_balances table. -func (a *ClaimableBalanceLoader) Stats() LoaderStats { - return a.stats -} - -func (a *ClaimableBalanceLoader) Name() string { - return "ClaimableBalanceLoader" } diff --git a/services/horizon/internal/db2/history/claimable_balance_loader_test.go b/services/horizon/internal/db2/history/claimable_balance_loader_test.go index aaf91ccdcc..490d5a0f70 100644 --- a/services/horizon/internal/db2/history/claimable_balance_loader_test.go +++ b/services/horizon/internal/db2/history/claimable_balance_loader_test.go @@ -8,6 +8,7 @@ import ( "github.com/stretchr/testify/assert" "github.com/stellar/go/services/horizon/internal/test" + "github.com/stellar/go/support/db" "github.com/stellar/go/xdr" ) @@ -17,6 +18,12 @@ func TestClaimableBalanceLoader(t *testing.T) { test.ResetHorizonDB(t, tt.HorizonDB) session := tt.HorizonSession() + testCBLoader(t, tt, session, ConcurrentInserts) + test.ResetHorizonDB(t, tt.HorizonDB) + testCBLoader(t, tt, session, ConcurrentDeletes) +} + +func testCBLoader(t *testing.T, tt *test.T, session *db.Session, mode ConcurrencyMode) { var ids []string for i := 0; i < 100; i++ { balanceID := xdr.ClaimableBalanceId{ @@ -28,14 +35,14 @@ func TestClaimableBalanceLoader(t *testing.T) { ids = append(ids, id) } - loader := NewClaimableBalanceLoader() + loader := NewClaimableBalanceLoader(mode) var futures []FutureClaimableBalanceID for _, id := range ids { future := loader.GetFuture(id) futures = append(futures, future) _, err := future.Value() assert.Error(t, err) - assert.Contains(t, err.Error(), `invalid claimable balance loader state,`) + assert.Contains(t, err.Error(), `invalid loader state,`) duplicateFuture := loader.GetFuture(id) assert.Equal(t, future, duplicateFuture) } @@ -63,8 +70,45 @@ func TestClaimableBalanceLoader(t *testing.T) { assert.Equal(t, cb.InternalID, internalID) } - futureCb := &FutureClaimableBalanceID{id: "not-present", loader: loader} + futureCb := &FutureClaimableBalanceID{key: "not-present", loader: loader} _, err = futureCb.Value() assert.Error(t, err) assert.Contains(t, err.Error(), `was not found`) + + // check that Loader works when all the previous values are already + // present in the db and also add 10 more rows to insert + loader = NewClaimableBalanceLoader(mode) + for i := 100; i < 110; i++ { + balanceID := xdr.ClaimableBalanceId{ + Type: xdr.ClaimableBalanceIdTypeClaimableBalanceIdTypeV0, + V0: &xdr.Hash{byte(i)}, + } + var id string + id, err = xdr.MarshalHex(balanceID) + tt.Assert.NoError(err) + ids = append(ids, id) + } + + for _, id := range ids { + future := loader.GetFuture(id) + _, err = future.Value() + assert.Error(t, err) + assert.Contains(t, err.Error(), `invalid loader state,`) + } + + assert.NoError(t, loader.Exec(context.Background(), session)) + assert.Equal(t, LoaderStats{ + Total: 110, + Inserted: 10, + }, loader.Stats()) + + for _, id := range ids { + internalID, err := loader.GetNow(id) + assert.NoError(t, err) + var cb HistoryClaimableBalance + cb, err = q.ClaimableBalanceByID(context.Background(), id) + assert.NoError(t, err) + assert.Equal(t, cb.BalanceID, id) + assert.Equal(t, cb.InternalID, internalID) + } } diff --git a/services/horizon/internal/db2/history/effect_batch_insert_builder_test.go b/services/horizon/internal/db2/history/effect_batch_insert_builder_test.go index e1ac998953..e917983b8f 100644 --- a/services/horizon/internal/db2/history/effect_batch_insert_builder_test.go +++ b/services/horizon/internal/db2/history/effect_batch_insert_builder_test.go @@ -20,7 +20,7 @@ func TestAddEffect(t *testing.T) { address := "GAQAA5L65LSYH7CQ3VTJ7F3HHLGCL3DSLAR2Y47263D56MNNGHSQSTVY" muxedAddres := "MAQAA5L65LSYH7CQ3VTJ7F3HHLGCL3DSLAR2Y47263D56MNNGHSQSAAAAAAAAAAE2LP26" - accountLoader := NewAccountLoader() + accountLoader := NewAccountLoader(ConcurrentInserts) builder := q.NewEffectBatchInsertBuilder() sequence := int32(56) diff --git a/services/horizon/internal/db2/history/effect_test.go b/services/horizon/internal/db2/history/effect_test.go index 19af0ceff8..ba59cf3fd4 100644 --- a/services/horizon/internal/db2/history/effect_test.go +++ b/services/horizon/internal/db2/history/effect_test.go @@ -23,7 +23,7 @@ func TestEffectsForLiquidityPool(t *testing.T) { // Insert Effect address := "GAQAA5L65LSYH7CQ3VTJ7F3HHLGCL3DSLAR2Y47263D56MNNGHSQSTVY" muxedAddres := "MAQAA5L65LSYH7CQ3VTJ7F3HHLGCL3DSLAR2Y47263D56MNNGHSQSAAAAAAAAAAE2LP26" - accountLoader := NewAccountLoader() + accountLoader := NewAccountLoader(ConcurrentInserts) builder := q.NewEffectBatchInsertBuilder() sequence := int32(56) @@ -47,7 +47,7 @@ func TestEffectsForLiquidityPool(t *testing.T) { // Insert Liquidity Pool history liquidityPoolID := "abcde" - lpLoader := NewLiquidityPoolLoader() + lpLoader := NewLiquidityPoolLoader(ConcurrentInserts) operationBuilder := q.NewOperationLiquidityPoolBatchInsertBuilder() tt.Assert.NoError(operationBuilder.Add(opID, lpLoader.GetFuture(liquidityPoolID))) @@ -78,7 +78,7 @@ func TestEffectsForTrustlinesSponsorshipEmptyAssetType(t *testing.T) { address := "GAQAA5L65LSYH7CQ3VTJ7F3HHLGCL3DSLAR2Y47263D56MNNGHSQSTVY" muxedAddres := "MAQAA5L65LSYH7CQ3VTJ7F3HHLGCL3DSLAR2Y47263D56MNNGHSQSAAAAAAAAAAE2LP26" - accountLoader := NewAccountLoader() + accountLoader := NewAccountLoader(ConcurrentInserts) builder := q.NewEffectBatchInsertBuilder() sequence := int32(56) diff --git a/services/horizon/internal/db2/history/fee_bump_scenario.go b/services/horizon/internal/db2/history/fee_bump_scenario.go index da6563c732..e161d686d1 100644 --- a/services/horizon/internal/db2/history/fee_bump_scenario.go +++ b/services/horizon/internal/db2/history/fee_bump_scenario.go @@ -288,7 +288,7 @@ func FeeBumpScenario(tt *test.T, q *Q, successful bool) FeeBumpFixture { details, err = json.Marshal(map[string]interface{}{"new_seq": 98}) tt.Assert.NoError(err) - accountLoader := NewAccountLoader() + accountLoader := NewAccountLoader(ConcurrentInserts) err = effectBuilder.Add( accountLoader.GetFuture(account.Address()), diff --git a/services/horizon/internal/db2/history/key_value.go b/services/horizon/internal/db2/history/key_value.go index 3d23451937..9fee9513c2 100644 --- a/services/horizon/internal/db2/history/key_value.go +++ b/services/horizon/internal/db2/history/key_value.go @@ -5,7 +5,6 @@ import ( "database/sql" "fmt" "strconv" - "strings" sq "github.com/Masterminds/squirrel" @@ -207,41 +206,26 @@ func (q *Q) getValueFromStore(ctx context.Context, key string, forUpdate bool) ( return value, nil } -type KeyValuePair struct { - Key string `db:"key"` - Value string `db:"value"` -} - -func (q *Q) getLookupTableReapOffsets(ctx context.Context) (map[string]int64, error) { - keys := make([]string, 0, len(historyLookupTables)) - for table := range historyLookupTables { - keys = append(keys, table+lookupTableReapOffsetSuffix) - } - offsets := map[string]int64{} - var pairs []KeyValuePair - query := sq.Select("key", "value"). +func (q *Q) getLookupTableReapOffset(ctx context.Context, table string) (int64, error) { + query := sq.Select("value"). From("key_value_store"). Where(map[string]interface{}{ - "key": keys, + "key": table + lookupTableReapOffsetSuffix, }) - err := q.Select(ctx, &pairs, query) + var text string + err := q.Get(ctx, &text, query) if err != nil { - return nil, err - } - for _, pair := range pairs { - table := strings.TrimSuffix(pair.Key, lookupTableReapOffsetSuffix) - if _, ok := historyLookupTables[table]; !ok { - return nil, fmt.Errorf("invalid key: %s", pair.Key) - } - - var offset int64 - offset, err = strconv.ParseInt(pair.Value, 10, 64) - if err != nil { - return nil, fmt.Errorf("invalid offset: %s", pair.Value) + if errors.Cause(err) == sql.ErrNoRows { + return 0, nil } - offsets[table] = offset + return 0, err + } + var offset int64 + offset, err = strconv.ParseInt(text, 10, 64) + if err != nil { + return 0, fmt.Errorf("invalid offset: %s for table %s", text, table) } - return offsets, err + return offset, nil } func (q *Q) updateLookupTableReapOffset(ctx context.Context, table string, offset int64) error { diff --git a/services/horizon/internal/db2/history/liquidity_pool_loader.go b/services/horizon/internal/db2/history/liquidity_pool_loader.go index d619fa3bb4..5da2a7b6fd 100644 --- a/services/horizon/internal/db2/history/liquidity_pool_loader.go +++ b/services/horizon/internal/db2/history/liquidity_pool_loader.go @@ -1,163 +1,47 @@ package history import ( - "context" - "database/sql/driver" - "fmt" - "sort" + "cmp" "github.com/stellar/go/support/collections/set" - "github.com/stellar/go/support/db" - "github.com/stellar/go/support/errors" - "github.com/stellar/go/support/ordered" ) // FutureLiquidityPoolID represents a future history liquidity pool. // A FutureLiquidityPoolID is created by an LiquidityPoolLoader and // the liquidity pool id is available after calling Exec() on // the LiquidityPoolLoader. -type FutureLiquidityPoolID struct { - id string - loader *LiquidityPoolLoader -} - -// Value implements the database/sql/driver Valuer interface. -func (a FutureLiquidityPoolID) Value() (driver.Value, error) { - return a.loader.GetNow(a.id) -} +type FutureLiquidityPoolID = future[string, HistoryLiquidityPool] // LiquidityPoolLoader will map liquidity pools to their internal // history ids. If there is no existing mapping for a given liquidity pool, // the LiquidityPoolLoader will insert into the history_liquidity_pools table to // establish a mapping. -type LiquidityPoolLoader struct { - sealed bool - set set.Set[string] - ids map[string]int64 - stats LoaderStats -} +type LiquidityPoolLoader = loader[string, HistoryLiquidityPool] // NewLiquidityPoolLoader will construct a new LiquidityPoolLoader instance. -func NewLiquidityPoolLoader() *LiquidityPoolLoader { +func NewLiquidityPoolLoader(concurrencyMode ConcurrencyMode) *LiquidityPoolLoader { return &LiquidityPoolLoader{ sealed: false, set: set.Set[string]{}, ids: map[string]int64{}, stats: LoaderStats{}, - } -} - -// GetFuture registers the given liquidity pool into the loader and -// returns a FutureLiquidityPoolID which will hold the internal history id for -// the liquidity pool after Exec() is called. -func (a *LiquidityPoolLoader) GetFuture(id string) FutureLiquidityPoolID { - if a.sealed { - panic(errSealed) - } - - a.set.Add(id) - return FutureLiquidityPoolID{ - id: id, - loader: a, - } -} - -// GetNow returns the internal history id for the given liquidity pool. -// GetNow should only be called on values which were registered by -// GetFuture() calls. Also, Exec() must be called before any GetNow -// call can succeed. -func (a *LiquidityPoolLoader) GetNow(id string) (int64, error) { - if !a.sealed { - return 0, fmt.Errorf(`invalid liquidity pool loader state, - Exec was not called yet to properly seal and resolve %v id`, id) - } - if internalID, ok := a.ids[id]; !ok { - return 0, fmt.Errorf(`liquidity pool loader id %q was not found`, id) - } else { - return internalID, nil - } -} - -func (a *LiquidityPoolLoader) lookupKeys(ctx context.Context, q *Q, ids []string) error { - for i := 0; i < len(ids); i += loaderLookupBatchSize { - end := ordered.Min(len(ids), i+loaderLookupBatchSize) - - lps, err := q.LiquidityPoolsByIDs(ctx, ids[i:end]) - if err != nil { - return errors.Wrap(err, "could not select accounts") - } - - for _, lp := range lps { - a.ids[lp.PoolID] = lp.InternalID - } - } - return nil -} - -// Exec will look up all the internal history ids for the liquidity pools registered in the loader. -// If there are no internal history ids for a given set of liquidity pools, Exec will insert rows -// into the history_liquidity_pools table. -func (a *LiquidityPoolLoader) Exec(ctx context.Context, session db.SessionInterface) error { - a.sealed = true - if len(a.set) == 0 { - return nil - } - q := &Q{session} - ids := make([]string, 0, len(a.set)) - for id := range a.set { - ids = append(ids, id) - } - - if err := a.lookupKeys(ctx, q, ids); err != nil { - return err - } - a.stats.Total += len(ids) - - insert := 0 - for _, id := range ids { - if _, ok := a.ids[id]; ok { - continue - } - ids[insert] = id - insert++ - } - if insert == 0 { - return nil - } - ids = ids[:insert] - // sort entries before inserting rows to prevent deadlocks on acquiring a ShareLock - // https://github.com/stellar/go/issues/2370 - sort.Strings(ids) - - err := bulkInsert( - ctx, - q, - "history_liquidity_pools", - []string{"liquidity_pool_id"}, - []bulkInsertField{ - { - name: "liquidity_pool_id", - dbType: "text", - objects: ids, - }, + name: "LiquidityPoolLoader", + table: "history_liquidity_pools", + columnsForKeys: func(keys []string) []columnValues { + return []columnValues{ + { + name: "liquidity_pool_id", + dbType: "text", + objects: keys, + }, + } }, - ) - if err != nil { - return err + mappingFromRow: func(row HistoryLiquidityPool) (string, int64) { + return row.PoolID, row.InternalID + }, + less: cmp.Less[string], + concurrencyMode: concurrencyMode, } - a.stats.Inserted += insert - - return a.lookupKeys(ctx, q, ids) -} - -// Stats returns the number of liquidity pools registered in the loader and the number of liquidity pools -// inserted into the history_liquidity_pools table. -func (a *LiquidityPoolLoader) Stats() LoaderStats { - return a.stats -} - -func (a *LiquidityPoolLoader) Name() string { - return "LiquidityPoolLoader" } // LiquidityPoolLoaderStub is a stub wrapper around LiquidityPoolLoader which allows @@ -168,7 +52,7 @@ type LiquidityPoolLoaderStub struct { // NewLiquidityPoolLoaderStub returns a new LiquidityPoolLoader instance func NewLiquidityPoolLoaderStub() LiquidityPoolLoaderStub { - return LiquidityPoolLoaderStub{Loader: NewLiquidityPoolLoader()} + return LiquidityPoolLoaderStub{Loader: NewLiquidityPoolLoader(ConcurrentInserts)} } // Insert updates the wrapped LiquidityPoolLoader so that the given liquidity pool diff --git a/services/horizon/internal/db2/history/liquidity_pool_loader_test.go b/services/horizon/internal/db2/history/liquidity_pool_loader_test.go index 25ca80826c..c7a1282760 100644 --- a/services/horizon/internal/db2/history/liquidity_pool_loader_test.go +++ b/services/horizon/internal/db2/history/liquidity_pool_loader_test.go @@ -7,6 +7,7 @@ import ( "github.com/stretchr/testify/assert" "github.com/stellar/go/services/horizon/internal/test" + "github.com/stellar/go/support/db" "github.com/stellar/go/xdr" ) @@ -16,6 +17,12 @@ func TestLiquidityPoolLoader(t *testing.T) { test.ResetHorizonDB(t, tt.HorizonDB) session := tt.HorizonSession() + testLPLoader(t, tt, session, ConcurrentInserts) + test.ResetHorizonDB(t, tt.HorizonDB) + testLPLoader(t, tt, session, ConcurrentDeletes) +} + +func testLPLoader(t *testing.T, tt *test.T, session *db.Session, mode ConcurrencyMode) { var ids []string for i := 0; i < 100; i++ { poolID := xdr.PoolId{byte(i)} @@ -24,12 +31,12 @@ func TestLiquidityPoolLoader(t *testing.T) { ids = append(ids, id) } - loader := NewLiquidityPoolLoader() + loader := NewLiquidityPoolLoader(mode) for _, id := range ids { future := loader.GetFuture(id) _, err := future.Value() assert.Error(t, err) - assert.Contains(t, err.Error(), `invalid liquidity pool loader state,`) + assert.Contains(t, err.Error(), `invalid loader state,`) duplicateFuture := loader.GetFuture(id) assert.Equal(t, future, duplicateFuture) } @@ -59,4 +66,39 @@ func TestLiquidityPoolLoader(t *testing.T) { _, err = loader.GetNow("not present") assert.Error(t, err) assert.Contains(t, err.Error(), `was not found`) + + // check that Loader works when all the previous values are already + // present in the db and also add 10 more rows to insert + loader = NewLiquidityPoolLoader(mode) + for i := 100; i < 110; i++ { + poolID := xdr.PoolId{byte(i)} + var id string + id, err = xdr.MarshalHex(poolID) + tt.Assert.NoError(err) + ids = append(ids, id) + } + + for _, id := range ids { + future := loader.GetFuture(id) + _, err = future.Value() + assert.Error(t, err) + assert.Contains(t, err.Error(), `invalid loader state,`) + } + + assert.NoError(t, loader.Exec(context.Background(), session)) + assert.Equal(t, LoaderStats{ + Total: 110, + Inserted: 10, + }, loader.Stats()) + + for _, id := range ids { + var internalID int64 + internalID, err = loader.GetNow(id) + assert.NoError(t, err) + var lp HistoryLiquidityPool + lp, err = q.LiquidityPoolByID(context.Background(), id) + assert.NoError(t, err) + assert.Equal(t, lp.PoolID, id) + assert.Equal(t, lp.InternalID, internalID) + } } diff --git a/services/horizon/internal/db2/history/loader.go b/services/horizon/internal/db2/history/loader.go new file mode 100644 index 0000000000..dc2236accb --- /dev/null +++ b/services/horizon/internal/db2/history/loader.go @@ -0,0 +1,365 @@ +package history + +import ( + "context" + "database/sql/driver" + "fmt" + "sort" + "strings" + + "github.com/lib/pq" + + "github.com/stellar/go/support/collections/set" + "github.com/stellar/go/support/db" +) + +var errSealed = fmt.Errorf("cannot register more entries to Loader after calling Exec()") + +// ConcurrencyMode is used to configure the level of thread-safety for a loader +type ConcurrencyMode int + +func (cm ConcurrencyMode) String() string { + switch cm { + case ConcurrentInserts: + return "ConcurrentInserts" + case ConcurrentDeletes: + return "ConcurrentDeletes" + default: + return "unknown" + } +} + +const ( + _ ConcurrencyMode = iota + // ConcurrentInserts configures the loader to maintain safety when there are multiple loaders + // inserting into the same table concurrently. This ConcurrencyMode is suitable for parallel reingestion. + // Note while ConcurrentInserts is enabled it is not safe to have deletes occurring concurrently on the + // same table. + ConcurrentInserts + // ConcurrentDeletes configures the loader to maintain safety when there is another thread which is invoking + // reapLookupTable() to delete rows from the same table concurrently. This ConcurrencyMode is suitable for + // live ingestion when reaping of lookup tables is enabled. + // Note while ConcurrentDeletes is enabled it is not safe to have multiple threads inserting concurrently to the + // same table. + ConcurrentDeletes +) + +// LoaderStats describes the result of executing a history lookup id Loader +type LoaderStats struct { + // Total is the number of elements registered to the Loader + Total int + // Inserted is the number of elements inserted into the lookup table + Inserted int +} + +type loader[K comparable, T any] struct { + sealed bool + set set.Set[K] + ids map[K]int64 + stats LoaderStats + name string + table string + columnsForKeys func([]K) []columnValues + mappingFromRow func(T) (K, int64) + less func(K, K) bool + concurrencyMode ConcurrencyMode +} + +type future[K comparable, T any] struct { + key K + loader *loader[K, T] +} + +// Value implements the database/sql/driver Valuer interface. +func (f future[K, T]) Value() (driver.Value, error) { + return f.loader.GetNow(f.key) +} + +// GetFuture registers the given key into the Loader and +// returns a future which will hold the history id for +// the key after Exec() is called. +func (l *loader[K, T]) GetFuture(key K) future[K, T] { + if l.sealed { + panic(errSealed) + } + + l.set.Add(key) + return future[K, T]{ + key: key, + loader: l, + } +} + +// GetNow returns the history id for the given key. +// GetNow should only be called on values which were registered by +// GetFuture() calls. Also, Exec() must be called before any GetNow +// call can succeed. +func (l *loader[K, T]) GetNow(key K) (int64, error) { + if !l.sealed { + return 0, fmt.Errorf(`invalid loader state, + Exec was not called yet to properly seal and resolve %v id`, key) + } + if internalID, ok := l.ids[key]; !ok { + return 0, fmt.Errorf(`loader key %v was not found`, key) + } else { + return internalID, nil + } +} + +// Exec will look up all the history ids for the keys registered in the Loader. +// If there are no history ids for a given set of keys, Exec will insert rows +// into the corresponding history table to establish a mapping between each key and its history id. +func (l *loader[K, T]) Exec(ctx context.Context, session db.SessionInterface) error { + l.sealed = true + if len(l.set) == 0 { + return nil + } + q := &Q{session} + keys := make([]K, 0, len(l.set)) + for key := range l.set { + keys = append(keys, key) + } + // sort entries before inserting rows to prevent deadlocks on acquiring a ShareLock + // https://github.com/stellar/go/issues/2370 + sort.Slice(keys, func(i, j int) bool { + return l.less(keys[i], keys[j]) + }) + + if l.concurrencyMode == ConcurrentInserts { + // if there are other ingestion transactions running concurrently, + // we need to first insert the records (with a ON CONFLICT DO NOTHING + // clause). Then, we can query for the remaining records. + // This order (insert first and then query) is important because + // if multiple concurrent transactions try to insert the same record + // only one of them will succeed and the other transactions will omit + // the record from the RETURNING set. + if count, err := l.insert(ctx, q, keys); err != nil { + return err + } else { + l.stats.Total += count + l.stats.Inserted += count + } + + if count, err := l.query(ctx, q, keys, false); err != nil { + return err + } else { + l.stats.Total += count + } + } else if l.concurrencyMode == ConcurrentDeletes { + // if the lookup table reaping transaction is running concurrently, + // we need to lock the rows from the lookup table to ensure that + // the reaper cannot run until after the ingestion transaction has + // been committed. + if count, err := l.query(ctx, q, keys, true); err != nil { + return err + } else { + l.stats.Total += count + } + + // insert whatever records were not found from l.query() + if count, err := l.insert(ctx, q, keys); err != nil { + return err + } else { + l.stats.Total += count + l.stats.Inserted += count + } + } else { + return fmt.Errorf("concurrency mode %v is invalid", l.concurrencyMode) + } + + return nil +} + +// Stats returns the number of addresses registered in the Loader and the number of rows +// inserted into the history table. +func (l *loader[K, T]) Stats() LoaderStats { + return l.stats +} + +func (l *loader[K, T]) Name() string { + return l.name +} + +func (l *loader[K, T]) filter(keys []K) []K { + if len(l.ids) == 0 { + return keys + } + + remaining := make([]K, 0, len(keys)) + for _, key := range keys { + if _, ok := l.ids[key]; ok { + continue + } + remaining = append(remaining, key) + } + return remaining +} + +func (l *loader[K, T]) updateMap(rows []T) { + for _, row := range rows { + key, id := l.mappingFromRow(row) + l.ids[key] = id + } +} + +func (l *loader[K, T]) insert(ctx context.Context, q *Q, keys []K) (int, error) { + keys = l.filter(keys) + if len(keys) == 0 { + return 0, nil + } + + var rows []T + err := bulkInsert( + ctx, + q, + l.table, + l.columnsForKeys(keys), + &rows, + ) + if err != nil { + return 0, err + } + + l.updateMap(rows) + return len(rows), nil +} + +func (l *loader[K, T]) query(ctx context.Context, q *Q, keys []K, lockRows bool) (int, error) { + keys = l.filter(keys) + if len(keys) == 0 { + return 0, nil + } + var suffix string + if lockRows { + suffix = "ORDER BY id ASC FOR KEY SHARE" + } + + var rows []T + err := bulkGet( + ctx, + q, + l.table, + l.columnsForKeys(keys), + &rows, + suffix, + ) + if err != nil { + return 0, err + } + + l.updateMap(rows) + return len(rows), nil +} + +type columnValues struct { + name string + dbType string + objects []string +} + +func bulkInsert(ctx context.Context, q *Q, table string, fields []columnValues, response interface{}) error { + unnestPart := make([]string, 0, len(fields)) + insertFieldsPart := make([]string, 0, len(fields)) + pqArrays := make([]interface{}, 0, len(fields)) + + // In the code below we are building the bulk insert query which looks like: + // + // WITH rows AS + // (SELECT + // /* unnestPart */ + // unnest(?::type1[]), /* field1 */ + // unnest(?::type2[]), /* field2 */ + // ... + // ) + // INSERT INTO table ( + // /* insertFieldsPart */ + // field1, + // field2, + // ... + // ) + // SELECT * FROM rows ON CONFLICT (field1, field2, ...) DO NOTHING RETURNING * + // + // Using unnest allows to get around the maximum limit of 65,535 query parameters, + // see https://www.postgresql.org/docs/12/limits.html and + // https://klotzandrew.com/blog/postgres-passing-65535-parameter-limit/ + // + // Without using unnest we would have to use multiple insert statements to insert + // all the rows for large datasets. + for _, field := range fields { + unnestPart = append( + unnestPart, + fmt.Sprintf("unnest(?::%s[]) /* %s */", field.dbType, field.name), + ) + insertFieldsPart = append( + insertFieldsPart, + field.name, + ) + pqArrays = append( + pqArrays, + pq.Array(field.objects), + ) + } + columns := strings.Join(insertFieldsPart, ",") + + sql := ` + WITH rows AS + (SELECT ` + strings.Join(unnestPart, ",") + `) + INSERT INTO ` + table + ` + (` + columns + `) + SELECT * FROM rows + ON CONFLICT (` + columns + `) DO NOTHING + RETURNING *` + + return q.SelectRaw( + ctx, + response, + sql, + pqArrays..., + ) +} + +func bulkGet(ctx context.Context, q *Q, table string, fields []columnValues, response interface{}, suffix string) error { + unnestPart := make([]string, 0, len(fields)) + columns := make([]string, 0, len(fields)) + pqArrays := make([]interface{}, 0, len(fields)) + + // In the code below we are building the bulk get query which looks like: + // + // SELECT * FROM table WHERE (field1, field2, ...) IN + // (SELECT + // /* unnestPart */ + // unnest(?::type1[]), /* field1 */ + // unnest(?::type2[]), /* field2 */ + // ... + // ) + // + // Using unnest allows to get around the maximum limit of 65,535 query parameters, + // see https://www.postgresql.org/docs/12/limits.html and + // https://klotzandrew.com/blog/postgres-passing-65535-parameter-limit/ + // + // Without using unnest we would have to use multiple select statements to obtain + // all the rows for large datasets. + for _, field := range fields { + unnestPart = append( + unnestPart, + fmt.Sprintf("unnest(?::%s[]) /* %s */", field.dbType, field.name), + ) + columns = append( + columns, + field.name, + ) + pqArrays = append( + pqArrays, + pq.Array(field.objects), + ) + } + sql := `SELECT * FROM ` + table + ` WHERE (` + strings.Join(columns, ",") + `) IN + (SELECT ` + strings.Join(unnestPart, ",") + `) ` + suffix + + return q.SelectRaw( + ctx, + response, + sql, + pqArrays..., + ) +} diff --git a/services/horizon/internal/db2/history/loader_concurrency_test.go b/services/horizon/internal/db2/history/loader_concurrency_test.go new file mode 100644 index 0000000000..e87d901bd8 --- /dev/null +++ b/services/horizon/internal/db2/history/loader_concurrency_test.go @@ -0,0 +1,197 @@ +package history + +import ( + "context" + "database/sql" + "fmt" + "sync" + "testing" + "time" + + "github.com/stretchr/testify/assert" + + "github.com/stellar/go/keypair" + "github.com/stellar/go/services/horizon/internal/test" +) + +func TestLoaderConcurrentInserts(t *testing.T) { + tt := test.Start(t) + defer tt.Finish() + test.ResetHorizonDB(t, tt.HorizonDB) + s1 := tt.HorizonSession() + s2 := s1.Clone() + + for _, testCase := range []struct { + mode ConcurrencyMode + pass bool + }{ + {ConcurrentInserts, true}, + {ConcurrentDeletes, false}, + } { + t.Run(fmt.Sprintf("%v", testCase.mode), func(t *testing.T) { + var addresses []string + for i := 0; i < 10; i++ { + addresses = append(addresses, keypair.MustRandom().Address()) + } + + l1 := NewAccountLoader(testCase.mode) + for _, address := range addresses { + l1.GetFuture(address) + } + + for i := 0; i < 5; i++ { + addresses = append(addresses, keypair.MustRandom().Address()) + } + + l2 := NewAccountLoader(testCase.mode) + for _, address := range addresses { + l2.GetFuture(address) + } + + assert.NoError(t, s1.Begin(context.Background())) + assert.NoError(t, l1.Exec(context.Background(), s1)) + + assert.NoError(t, s2.Begin(context.Background())) + var wg sync.WaitGroup + wg.Add(1) + go func() { + defer wg.Done() + <-time.After(time.Second * 3) + assert.NoError(t, s1.Commit()) + }() + // l2.Exec(context.Background(), s2) will block until s1 + // is committed because s1 and s2 both attempt to insert common + // accounts and, since s1 executed first, s2 must wait until + // s1 terminates. + assert.NoError(t, l2.Exec(context.Background(), s2)) + assert.NoError(t, s2.Commit()) + wg.Wait() + + assert.Equal(t, LoaderStats{ + Total: 10, + Inserted: 10, + }, l1.Stats()) + + if testCase.pass { + assert.Equal(t, LoaderStats{ + Total: 15, + Inserted: 5, + }, l2.Stats()) + } else { + assert.Equal(t, LoaderStats{ + Total: 5, + Inserted: 5, + }, l2.Stats()) + return + } + + q := &Q{s1} + for _, address := range addresses[:10] { + l1Id, err := l1.GetNow(address) + assert.NoError(t, err) + + l2Id, err := l2.GetNow(address) + assert.NoError(t, err) + assert.Equal(t, l1Id, l2Id) + + var account Account + assert.NoError(t, q.AccountByAddress(context.Background(), &account, address)) + assert.Equal(t, account.ID, l1Id) + assert.Equal(t, account.Address, address) + } + + for _, address := range addresses[10:] { + l2Id, err := l2.GetNow(address) + assert.NoError(t, err) + + _, err = l1.GetNow(address) + assert.ErrorContains(t, err, "was not found") + + var account Account + assert.NoError(t, q.AccountByAddress(context.Background(), &account, address)) + assert.Equal(t, account.ID, l2Id) + assert.Equal(t, account.Address, address) + } + }) + } +} + +func TestLoaderConcurrentDeletes(t *testing.T) { + tt := test.Start(t) + defer tt.Finish() + test.ResetHorizonDB(t, tt.HorizonDB) + s1 := tt.HorizonSession() + s2 := s1.Clone() + + for _, testCase := range []struct { + mode ConcurrencyMode + pass bool + }{ + {ConcurrentInserts, false}, + {ConcurrentDeletes, true}, + } { + t.Run(fmt.Sprintf("%v", testCase.mode), func(t *testing.T) { + var addresses []string + for i := 0; i < 10; i++ { + addresses = append(addresses, keypair.MustRandom().Address()) + } + + loader := NewAccountLoader(testCase.mode) + for _, address := range addresses { + loader.GetFuture(address) + } + assert.NoError(t, loader.Exec(context.Background(), s1)) + + var ids []int64 + for _, address := range addresses { + id, err := loader.GetNow(address) + assert.NoError(t, err) + ids = append(ids, id) + } + + loader = NewAccountLoader(testCase.mode) + for _, address := range addresses { + loader.GetFuture(address) + } + + assert.NoError(t, s1.Begin(context.Background())) + assert.NoError(t, loader.Exec(context.Background(), s1)) + + assert.NoError(t, s2.Begin(context.Background())) + q2 := &Q{s2} + + var wg sync.WaitGroup + wg.Add(1) + go func() { + defer wg.Done() + <-time.After(time.Second * 3) + + q1 := &Q{s1} + for _, address := range addresses { + id, err := loader.GetNow(address) + assert.NoError(t, err) + + var account Account + err = q1.AccountByAddress(context.Background(), &account, address) + if testCase.pass { + assert.NoError(t, err) + assert.Equal(t, account.ID, id) + assert.Equal(t, account.Address, address) + } else { + assert.ErrorContains(t, err, sql.ErrNoRows.Error()) + } + } + assert.NoError(t, s1.Commit()) + }() + + // the reaper should block until s1 has been committed because s1 has locked + // the orphaned rows + deletedCount, err := q2.reapLookupTable(context.Background(), "history_accounts", ids, 1000) + assert.NoError(t, err) + assert.Equal(t, int64(len(addresses)), deletedCount) + assert.NoError(t, s2.Commit()) + + wg.Wait() + }) + } +} diff --git a/services/horizon/internal/db2/history/main.go b/services/horizon/internal/db2/history/main.go index e9d8ffb185..59d828d091 100644 --- a/services/horizon/internal/db2/history/main.go +++ b/services/horizon/internal/db2/history/main.go @@ -9,6 +9,7 @@ import ( "database/sql/driver" "encoding/json" "fmt" + "strconv" "strings" "sync" "time" @@ -282,7 +283,8 @@ type IngestionQ interface { NewTradeBatchInsertBuilder() TradeBatchInsertBuilder RebuildTradeAggregationTimes(ctx context.Context, from, to strtime.Millis, roundingSlippageFilter int) error RebuildTradeAggregationBuckets(ctx context.Context, fromLedger, toLedger uint32, roundingSlippageFilter int) error - ReapLookupTables(ctx context.Context, batchSize int) (map[string]LookupTableReapResult, error) + ReapLookupTable(ctx context.Context, table string, ids []int64, newOffset int64) (int64, error) + FindLookupTableRowsToReap(ctx context.Context, table string, batchSize int) ([]int64, int64, error) CreateAssets(ctx context.Context, assets []xdr.Asset, batchSize int) (map[string]Asset, error) QTransactions QTrustLines @@ -307,6 +309,7 @@ type IngestionQ interface { GetNextLedgerSequence(context.Context, uint32) (uint32, bool, error) TryStateVerificationLock(context.Context) (bool, error) TryReaperLock(context.Context) (bool, error) + TryLookupTableReaperLock(ctx context.Context) (bool, error) ElderLedger(context.Context, interface{}) error } @@ -977,63 +980,70 @@ type LookupTableReapResult struct { Duration time.Duration } -// ReapLookupTables removes rows from lookup tables like history_claimable_balances -// which aren't used (orphaned), i.e. history entries for them were reaped. -// This method must be executed inside ingestion transaction. Otherwise it may -// create invalid state in lookup and history tables. -func (q *Q) ReapLookupTables(ctx context.Context, batchSize int) ( - map[string]LookupTableReapResult, - error, -) { - if q.GetTx() == nil { - return nil, errors.New("cannot be called outside of an ingestion transaction") +func (q *Q) FindLookupTableRowsToReap(ctx context.Context, table string, batchSize int) ([]int64, int64, error) { + offset, err := q.getLookupTableReapOffset(ctx, table) + if err != nil { + return nil, 0, fmt.Errorf("could not obtain offsets: %w", err) } - offsets, err := q.getLookupTableReapOffsets(ctx) + // Find new offset before removing the rows + var newOffset int64 + err = q.GetRaw( + ctx, + &newOffset, + fmt.Sprintf( + "SELECT id FROM %s WHERE id >= %d ORDER BY id ASC LIMIT 1 OFFSET %d", + table, offset, batchSize, + ), + ) if err != nil { - return nil, fmt.Errorf("could not obtain offsets: %w", err) + if q.NoRows(err) { + newOffset = 0 + } else { + return nil, 0, err + } } - results := map[string]LookupTableReapResult{} - for table, historyTables := range historyLookupTables { - startTime := time.Now() - query := constructReapLookupTablesQuery(table, historyTables, batchSize, offsets[table]) + var ids []int64 + err = q.SelectRaw(ctx, &ids, constructFindReapLookupTablesQuery(table, batchSize, offset)) + if err != nil { + return nil, 0, fmt.Errorf("could not query orphaned rows: %w", err) + } - // Find new offset before removing the rows - var newOffset int64 - err := q.GetRaw(ctx, &newOffset, fmt.Sprintf("SELECT id FROM %s where id >= %d limit 1 offset %d", table, offsets[table], batchSize)) - if err != nil { - if q.NoRows(err) { - newOffset = 0 - } else { - return nil, err - } - } + return ids, newOffset, nil +} - res, err := q.ExecRaw( - context.WithValue(ctx, &db.QueryTypeContextKey, db.DeleteQueryType), - query, - ) - if err != nil { - return nil, errors.Wrapf(err, "error running query: %s", query) - } +func (q *Q) ReapLookupTable(ctx context.Context, table string, ids []int64, newOffset int64) (int64, error) { + if err := q.Begin(ctx); err != nil { + return 0, fmt.Errorf("could not start transaction: %w", err) + } + defer q.Rollback() - if err = q.updateLookupTableReapOffset(ctx, table, newOffset); err != nil { - return nil, fmt.Errorf("error updating offset: %w", err) - } + rowsDeleted, err := q.reapLookupTable(ctx, table, ids, newOffset) + if err != nil { + return 0, err + } - rows, err := res.RowsAffected() - if err != nil { - return nil, errors.Wrapf(err, "error running RowsAffected after query: %s", query) - } + if err := q.Commit(); err != nil { + return 0, fmt.Errorf("could not commit transaction: %w", err) + } + return rowsDeleted, nil +} + +func (q *Q) reapLookupTable(ctx context.Context, table string, ids []int64, newOffset int64) (int64, error) { + if err := q.updateLookupTableReapOffset(ctx, table, newOffset); err != nil { + return 0, fmt.Errorf("error updating offset for table %s: %w ", table, err) + } - results[table] = LookupTableReapResult{ - Offset: newOffset, - RowsDeleted: rows, - Duration: time.Since(startTime), + var rowsDeleted int64 + if len(ids) > 0 { + var err error + rowsDeleted, err = q.deleteLookupTableRows(ctx, table, ids) + if err != nil { + return 0, fmt.Errorf("could not delete orphaned rows: %w", err) } } - return results, nil + return rowsDeleted, nil } var historyLookupTables = map[string][]tableObjectFieldPair{ @@ -1100,54 +1110,100 @@ var historyLookupTables = map[string][]tableObjectFieldPair{ }, } -// constructReapLookupTablesQuery creates a query like (using history_claimable_balances +func (q *Q) deleteLookupTableRows(ctx context.Context, table string, ids []int64) (int64, error) { + deleteQuery := constructDeleteLookupTableRowsQuery(table, ids) + result, err := q.ExecRaw( + context.WithValue(ctx, &db.QueryTypeContextKey, db.DeleteQueryType), + deleteQuery, + ) + if err != nil { + return 0, fmt.Errorf("error running query %s : %w", deleteQuery, err) + } + var deletedCount int64 + deletedCount, err = result.RowsAffected() + if err != nil { + return 0, fmt.Errorf("error getting deleted count: %w", err) + } + return deletedCount, nil +} + +// constructDeleteLookupTableRowsQuery creates a query like (using history_claimable_balances // as an example): // -// delete from history_claimable_balances where id in ( -// -// WITH ha_batch AS ( -// SELECT id -// FROM history_claimable_balances -// WHERE id >= 1000 -// ORDER BY id limit 1000 -// ) SELECT e1.id as id FROM ha_batch e1 +// WITH ha_batch AS ( +// SELECT id +// FROM history_claimable_balances +// WHERE IN ($1, $2, ...) ORDER BY id asc FOR UPDATE +// ) DELETE FROM history_claimable_balances WHERE id IN ( +// SELECT e1.id as id FROM ha_batch e1 // WHERE NOT EXISTS (SELECT 1 FROM history_transaction_claimable_balances WHERE history_transaction_claimable_balances.history_claimable_balance_id = id limit 1) // AND NOT EXISTS (SELECT 1 FROM history_operation_claimable_balances WHERE history_operation_claimable_balances.history_claimable_balance_id = id limit 1) // ) // -// In short it checks the 1000 rows omitting 1000 row of history_claimable_balances +// It checks each of the candidate rows provided in the top level IN clause // and counts occurrences of each row in corresponding history tables. // If there are no history rows for a given id, the row in // history_claimable_balances is removed. // -// The offset param should be increased before each execution. Given that -// some rows will be removed and some will be added by ingestion it's -// possible that rows will be skipped from deletion. But offset is reset -// when it reaches the table size so eventually all orphaned rows are -// deleted. -func constructReapLookupTablesQuery(table string, historyTables []tableObjectFieldPair, batchSize int, offset int64) string { +// Note that the rows are locked using via SELECT FOR UPDATE. The reason +// for that is to maintain safety when ingestion is running concurrently. +// The ingestion loaders will also lock rows from the history lookup tables +// via SELECT FOR KEY SHARE. This will ensure that the reaping transaction +// will block until the ingestion transaction commits (or vice-versa). +func constructDeleteLookupTableRowsQuery(table string, ids []int64) string { + var conditions []string + for _, referencedTable := range historyLookupTables[table] { + conditions = append( + conditions, + fmt.Sprintf( + "NOT EXISTS ( SELECT 1 as row FROM %s WHERE %s.%s = id LIMIT 1)", + referencedTable.name, + referencedTable.name, referencedTable.objectField, + ), + ) + } + + stringIds := make([]string, len(ids)) + for i, id := range ids { + stringIds[i] = strconv.FormatInt(id, 10) + } + innerQuery := fmt.Sprintf( + "SELECT id FROM %s WHERE id IN (%s) ORDER BY id asc FOR UPDATE", + table, + strings.Join(stringIds, ", "), + ) + + deleteQuery := fmt.Sprintf( + "WITH ha_batch AS (%s) DELETE FROM %s WHERE id IN ("+ + "SELECT e1.id as id FROM ha_batch e1 WHERE %s)", + innerQuery, + table, + strings.Join(conditions, " AND "), + ) + return deleteQuery +} + +func constructFindReapLookupTablesQuery(table string, batchSize int, offset int64) string { var conditions []string - for _, historyTable := range historyTables { + for _, referencedTable := range historyLookupTables[table] { conditions = append( conditions, fmt.Sprintf( "NOT EXISTS ( SELECT 1 as row FROM %s WHERE %s.%s = id LIMIT 1)", - historyTable.name, - historyTable.name, historyTable.objectField, + referencedTable.name, + referencedTable.name, referencedTable.objectField, ), ) } return fmt.Sprintf( - "DELETE FROM %s WHERE id IN ("+ - "WITH ha_batch AS (SELECT id FROM %s WHERE id >= %d ORDER BY id limit %d) "+ + "WITH ha_batch AS (SELECT id FROM %s WHERE id >= %d ORDER BY id ASC limit %d) "+ "SELECT e1.id as id FROM ha_batch e1 WHERE ", table, - table, offset, batchSize, - ) + strings.Join(conditions, " AND ") + ")" + ) + strings.Join(conditions, " AND ") } // DeleteRangeAll deletes a range of rows from all history tables between diff --git a/services/horizon/internal/db2/history/main_test.go b/services/horizon/internal/db2/history/main_test.go index 1a28b9e584..a86f4c14a4 100644 --- a/services/horizon/internal/db2/history/main_test.go +++ b/services/horizon/internal/db2/history/main_test.go @@ -69,20 +69,34 @@ func TestElderLedger(t *testing.T) { } } +func TestConstructDeleteLookupTableRowsQuery(t *testing.T) { + query := constructDeleteLookupTableRowsQuery( + "history_accounts", + []int64{100, 20, 30}, + ) + + assert.Equal(t, + "WITH ha_batch AS (SELECT id FROM history_accounts WHERE id IN (100, 20, 30) ORDER BY id asc FOR UPDATE) "+ + "DELETE FROM history_accounts WHERE id IN (SELECT e1.id as id FROM ha_batch e1 "+ + "WHERE NOT EXISTS ( SELECT 1 as row FROM history_transaction_participants WHERE history_transaction_participants.history_account_id = id LIMIT 1) "+ + "AND NOT EXISTS ( SELECT 1 as row FROM history_effects WHERE history_effects.history_account_id = id LIMIT 1) "+ + "AND NOT EXISTS ( SELECT 1 as row FROM history_operation_participants WHERE history_operation_participants.history_account_id = id LIMIT 1) "+ + "AND NOT EXISTS ( SELECT 1 as row FROM history_trades WHERE history_trades.base_account_id = id LIMIT 1) "+ + "AND NOT EXISTS ( SELECT 1 as row FROM history_trades WHERE history_trades.counter_account_id = id LIMIT 1))", query) +} + func TestConstructReapLookupTablesQuery(t *testing.T) { - query := constructReapLookupTablesQuery( + query := constructFindReapLookupTablesQuery( "history_accounts", - historyLookupTables["history_accounts"], 10, 0, ) assert.Equal(t, - "DELETE FROM history_accounts WHERE id IN ("+ - "WITH ha_batch AS (SELECT id FROM history_accounts WHERE id >= 0 ORDER BY id limit 10) SELECT e1.id as id FROM ha_batch e1 "+ + "WITH ha_batch AS (SELECT id FROM history_accounts WHERE id >= 0 ORDER BY id ASC limit 10) SELECT e1.id as id FROM ha_batch e1 "+ "WHERE NOT EXISTS ( SELECT 1 as row FROM history_transaction_participants WHERE history_transaction_participants.history_account_id = id LIMIT 1) "+ "AND NOT EXISTS ( SELECT 1 as row FROM history_effects WHERE history_effects.history_account_id = id LIMIT 1) "+ "AND NOT EXISTS ( SELECT 1 as row FROM history_operation_participants WHERE history_operation_participants.history_account_id = id LIMIT 1) "+ "AND NOT EXISTS ( SELECT 1 as row FROM history_trades WHERE history_trades.base_account_id = id LIMIT 1) "+ - "AND NOT EXISTS ( SELECT 1 as row FROM history_trades WHERE history_trades.counter_account_id = id LIMIT 1))", query) + "AND NOT EXISTS ( SELECT 1 as row FROM history_trades WHERE history_trades.counter_account_id = id LIMIT 1)", query) } diff --git a/services/horizon/internal/db2/history/operation_participant_batch_insert_builder_test.go b/services/horizon/internal/db2/history/operation_participant_batch_insert_builder_test.go index 7e823064f2..eb30fe6659 100644 --- a/services/horizon/internal/db2/history/operation_participant_batch_insert_builder_test.go +++ b/services/horizon/internal/db2/history/operation_participant_batch_insert_builder_test.go @@ -15,7 +15,7 @@ func TestAddOperationParticipants(t *testing.T) { test.ResetHorizonDB(t, tt.HorizonDB) q := &Q{tt.HorizonSession()} - accountLoader := NewAccountLoader() + accountLoader := NewAccountLoader(ConcurrentInserts) address := keypair.MustRandom().Address() tt.Assert.NoError(q.Begin(tt.Ctx)) builder := q.NewOperationParticipantBatchInsertBuilder() diff --git a/services/horizon/internal/db2/history/operation_test.go b/services/horizon/internal/db2/history/operation_test.go index 1d20a9cb10..8899bfcdf6 100644 --- a/services/horizon/internal/db2/history/operation_test.go +++ b/services/horizon/internal/db2/history/operation_test.go @@ -125,7 +125,7 @@ func TestOperationByLiquidityPool(t *testing.T) { // Insert Liquidity Pool history liquidityPoolID := "a2f38836a839de008cf1d782c81f45e1253cc5d3dad9110b872965484fec0a49" - lpLoader := NewLiquidityPoolLoader() + lpLoader := NewLiquidityPoolLoader(ConcurrentInserts) lpOperationBuilder := q.NewOperationLiquidityPoolBatchInsertBuilder() tt.Assert.NoError(lpOperationBuilder.Add(opID1, lpLoader.GetFuture(liquidityPoolID))) diff --git a/services/horizon/internal/db2/history/participants_test.go b/services/horizon/internal/db2/history/participants_test.go index 37f7654abb..15d09da0ac 100644 --- a/services/horizon/internal/db2/history/participants_test.go +++ b/services/horizon/internal/db2/history/participants_test.go @@ -35,7 +35,7 @@ func TestTransactionParticipantsBatch(t *testing.T) { q := &Q{tt.HorizonSession()} batch := q.NewTransactionParticipantsBatchInsertBuilder() - accountLoader := NewAccountLoader() + accountLoader := NewAccountLoader(ConcurrentInserts) transactionID := int64(1) otherTransactionID := int64(2) diff --git a/services/horizon/internal/db2/history/reap_test.go b/services/horizon/internal/db2/history/reap_test.go index 5601cd19b6..af20fbc976 100644 --- a/services/horizon/internal/db2/history/reap_test.go +++ b/services/horizon/internal/db2/history/reap_test.go @@ -1,13 +1,43 @@ package history_test import ( + "context" "testing" + "github.com/stretchr/testify/assert" + "github.com/stellar/go/services/horizon/internal/db2/history" "github.com/stellar/go/services/horizon/internal/ingest" "github.com/stellar/go/services/horizon/internal/test" ) +type reapResult struct { + Offset int64 + RowsDeleted int64 +} + +func reapLookupTables(t *testing.T, q *history.Q, batchSize int) map[string]reapResult { + results := map[string]reapResult{} + + for _, table := range []string{ + "history_accounts", + "history_assets", + "history_claimable_balances", + "history_liquidity_pools", + } { + ids, offset, err := q.FindLookupTableRowsToReap(context.Background(), table, batchSize) + assert.NoError(t, err) + rowsDeleted, err := q.ReapLookupTable(context.Background(), table, ids, offset) + assert.NoError(t, err) + results[table] = reapResult{ + Offset: offset, + RowsDeleted: rowsDeleted, + } + } + + return results +} + func TestReapLookupTables(t *testing.T) { tt := test.Start(t) defer tt.Finish() @@ -46,14 +76,7 @@ func TestReapLookupTables(t *testing.T) { q := &history.Q{tt.HorizonSession()} - err = q.Begin(tt.Ctx) - tt.Require.NoError(err) - - results, err := q.ReapLookupTables(tt.Ctx, 5) - tt.Require.NoError(err) - - err = q.Commit() - tt.Require.NoError(err) + results := reapLookupTables(t, q, 5) err = db.GetRaw(tt.Ctx, &curLedgers, `SELECT COUNT(*) FROM history_ledgers`) tt.Require.NoError(err) @@ -91,14 +114,7 @@ func TestReapLookupTables(t *testing.T) { tt.Assert.Equal(int64(0), results["history_claimable_balances"].Offset) tt.Assert.Equal(int64(0), results["history_liquidity_pools"].Offset) - err = q.Begin(tt.Ctx) - tt.Require.NoError(err) - - results, err = q.ReapLookupTables(tt.Ctx, 5) - tt.Require.NoError(err) - - err = q.Commit() - tt.Require.NoError(err) + results = reapLookupTables(t, q, 5) err = db.GetRaw(tt.Ctx, &curAccounts, `SELECT COUNT(*) FROM history_accounts`) tt.Require.NoError(err) @@ -121,14 +137,7 @@ func TestReapLookupTables(t *testing.T) { tt.Assert.Equal(int64(0), results["history_claimable_balances"].Offset) tt.Assert.Equal(int64(0), results["history_liquidity_pools"].Offset) - err = q.Begin(tt.Ctx) - tt.Require.NoError(err) - - results, err = q.ReapLookupTables(tt.Ctx, 1000) - tt.Require.NoError(err) - - err = q.Commit() - tt.Require.NoError(err) + results = reapLookupTables(t, q, 1000) err = db.GetRaw(tt.Ctx, &curAccounts, `SELECT COUNT(*) FROM history_accounts`) tt.Require.NoError(err) diff --git a/services/horizon/internal/db2/history/transaction_test.go b/services/horizon/internal/db2/history/transaction_test.go index 65c6734644..bd8cb1673c 100644 --- a/services/horizon/internal/db2/history/transaction_test.go +++ b/services/horizon/internal/db2/history/transaction_test.go @@ -79,7 +79,7 @@ func TestTransactionByLiquidityPool(t *testing.T) { // Insert Liquidity Pool history liquidityPoolID := "a2f38836a839de008cf1d782c81f45e1253cc5d3dad9110b872965484fec0a49" - lpLoader := NewLiquidityPoolLoader() + lpLoader := NewLiquidityPoolLoader(ConcurrentInserts) lpTransactionBuilder := q.NewTransactionLiquidityPoolBatchInsertBuilder() tt.Assert.NoError(lpTransactionBuilder.Add(txID, lpLoader.GetFuture(liquidityPoolID))) tt.Assert.NoError(lpLoader.Exec(tt.Ctx, q)) @@ -940,15 +940,15 @@ func TestTransactionQueryBuilder(t *testing.T) { tt.Assert.NoError(q.Begin(tt.Ctx)) address := "GBXGQJWVLWOYHFLVTKWV5FGHA3LNYY2JQKM7OAJAUEQFU6LPCSEFVXON" - accountLoader := NewAccountLoader() + accountLoader := NewAccountLoader(ConcurrentInserts) accountLoader.GetFuture(address) cbID := "00000000178826fbfe339e1f5c53417c6fedfe2c05e8bec14303143ec46b38981b09c3f9" - cbLoader := NewClaimableBalanceLoader() + cbLoader := NewClaimableBalanceLoader(ConcurrentInserts) cbLoader.GetFuture(cbID) lpID := "0000a8198b5e25994c1ca5b0556faeb27325ac746296944144e0a7406d501e8a" - lpLoader := NewLiquidityPoolLoader() + lpLoader := NewLiquidityPoolLoader(ConcurrentInserts) lpLoader.GetFuture(lpID) tt.Assert.NoError(accountLoader.Exec(tt.Ctx, q)) diff --git a/services/horizon/internal/db2/history/verify_lock.go b/services/horizon/internal/db2/history/verify_lock.go index 29bc11a473..4d7d1fbde7 100644 --- a/services/horizon/internal/db2/history/verify_lock.go +++ b/services/horizon/internal/db2/history/verify_lock.go @@ -13,9 +13,13 @@ const ( // all ingesting nodes use the same value which is why it's hard coded here.`1 stateVerificationLockId = 73897213 // reaperLockId is the objid for the advisory lock acquired during - // reaping. The value is arbitrary. The only requirement is that + // reaping of history tables. The value is arbitrary. The only requirement is that // all ingesting nodes use the same value which is why it's hard coded here. reaperLockId = 944670730 + // lookupTableReaperLockId is the objid for the advisory lock acquired during + // reaping of lookup tables. The value is arbitrary. The only requirement is that + // all ingesting nodes use the same value which is why it's hard coded here. + lookupTableReaperLockId = 329518896 ) // TryStateVerificationLock attempts to acquire the state verification lock @@ -34,6 +38,10 @@ func (q *Q) TryReaperLock(ctx context.Context) (bool, error) { return q.tryAdvisoryLock(ctx, reaperLockId) } +func (q *Q) TryLookupTableReaperLock(ctx context.Context) (bool, error) { + return q.tryAdvisoryLock(ctx, lookupTableReaperLockId) +} + func (q *Q) tryAdvisoryLock(ctx context.Context, lockId int) (bool, error) { if tx := q.GetTx(); tx == nil { return false, errors.New("cannot be called outside of a transaction") diff --git a/services/horizon/internal/docs/GUIDE_FOR_DEVELOPERS.md b/services/horizon/internal/docs/GUIDE_FOR_DEVELOPERS.md index cdbc1b97c7..9464aea2a5 100644 --- a/services/horizon/internal/docs/GUIDE_FOR_DEVELOPERS.md +++ b/services/horizon/internal/docs/GUIDE_FOR_DEVELOPERS.md @@ -171,14 +171,14 @@ By default, the Docker Compose file is configured to use version 21 of Protocol ```bash export PROTOCOL_VERSION="21" export CORE_IMAGE="stellar/stellar-core:21" -export STELLAR_CORE_VERSION="21.0.0-1872.c6f474133.focal" +export STELLAR_CORE_VERSION="21.3.1-2007.4ede19620.focal" ``` Example: Runs Stellar Protocol and Core version 21, for any mode of testnet, standalone, pubnet ```bash -PROTOCOL_VERSION=21 CORE_IMAGE=stellar/stellar-core:21 STELLAR_CORE_VERSION=21.0.0-1872.c6f474133.focal ./start.sh [standalone|pubnet] +PROTOCOL_VERSION=21 CORE_IMAGE=stellar/stellar-core:21 STELLAR_CORE_VERSION=21.3.1-2007.4ede19620.focal ./start.sh [standalone|pubnet] ``` ## **Logging** diff --git a/services/horizon/internal/docs/TESTING_NOTES.md b/services/horizon/internal/docs/TESTING_NOTES.md index c7db9cd465..4dcc5ce6f2 100644 --- a/services/horizon/internal/docs/TESTING_NOTES.md +++ b/services/horizon/internal/docs/TESTING_NOTES.md @@ -16,7 +16,7 @@ Before running integration tests, you also need to set some environment variable ```bash export HORIZON_INTEGRATION_TESTS_ENABLED=true export HORIZON_INTEGRATION_TESTS_CORE_MAX_SUPPORTED_PROTOCOL=21 -export HORIZON_INTEGRATION_TESTS_DOCKER_IMG=stellar/stellar-core:21.0.0-1872.c6f474133.focal +export HORIZON_INTEGRATION_TESTS_DOCKER_IMG=stellar/stellar-core:21.3.1-2007.4ede19620.focal ``` Make sure to check [horizon.yml](/.github/workflows/horizon.yml) for the latest core image version. diff --git a/services/horizon/internal/ingest/main.go b/services/horizon/internal/ingest/main.go index 64e4558723..63ee7ba457 100644 --- a/services/horizon/internal/ingest/main.go +++ b/services/horizon/internal/ingest/main.go @@ -70,16 +70,15 @@ const ( // * Ledger ingestion, // * State verifications, // * Metrics updates. - // * Reaping (requires 2 connections, the extra connection is used for holding the advisory lock) - MaxDBConnections = 5 + // * Reaping of history (requires 2 connections, the extra connection is used for holding the advisory lock) + // * Reaping of lookup tables (requires 2 connections, the extra connection is used for holding the advisory lock) + MaxDBConnections = 7 stateVerificationErrorThreshold = 3 // 100 ledgers per flush has shown in stress tests // to be best point on performance curve, default to that. MaxLedgersPerFlush uint32 = 100 - - reapLookupTablesBatchSize = 1000 ) var log = logpkg.DefaultLogger.WithField("service", "ingest") @@ -172,9 +171,6 @@ type Metrics struct { // duration of rebuilding trade aggregation buckets. LedgerIngestionTradeAggregationDuration prometheus.Summary - ReapDurationByLookupTable *prometheus.SummaryVec - RowsReapedByLookupTable *prometheus.SummaryVec - // StateVerifyDuration exposes timing metrics about the rate and // duration of state verification. StateVerifyDuration prometheus.Summary @@ -256,7 +252,8 @@ type system struct { maxLedgerPerFlush uint32 - reaper *Reaper + reaper *Reaper + lookupTableReaper *lookupTableReaper currentStateMutex sync.Mutex currentState State @@ -369,6 +366,7 @@ func NewSystem(config Config) (System, error) { config.ReapConfig, config.HistorySession, ), + lookupTableReaper: newLookupTableReaper(config.HistorySession), } system.initMetrics() @@ -409,18 +407,6 @@ func (s *system) initMetrics() { Objectives: map[float64]float64{0.5: 0.05, 0.9: 0.01, 0.99: 0.001}, }) - s.metrics.ReapDurationByLookupTable = prometheus.NewSummaryVec(prometheus.SummaryOpts{ - Namespace: "horizon", Subsystem: "ingest", Name: "reap_lookup_tables_duration_seconds", - Help: "reap lookup tables durations, sliding window = 10m", - Objectives: map[float64]float64{0.5: 0.05, 0.9: 0.01, 0.99: 0.001}, - }, []string{"table"}) - - s.metrics.RowsReapedByLookupTable = prometheus.NewSummaryVec(prometheus.SummaryOpts{ - Namespace: "horizon", Subsystem: "ingest", Name: "reap_lookup_tables_rows_reaped", - Help: "rows deleted during lookup tables reap, sliding window = 10m", - Objectives: map[float64]float64{0.5: 0.05, 0.9: 0.01, 0.99: 0.001}, - }, []string{"table"}) - s.metrics.StateVerifyDuration = prometheus.NewSummary(prometheus.SummaryOpts{ Namespace: "horizon", Subsystem: "ingest", Name: "state_verify_duration_seconds", Help: "state verification durations, sliding window = 10m", @@ -538,8 +524,6 @@ func (s *system) RegisterMetrics(registry *prometheus.Registry) { registry.MustRegister(s.metrics.LocalLatestLedger) registry.MustRegister(s.metrics.LedgerIngestionDuration) registry.MustRegister(s.metrics.LedgerIngestionTradeAggregationDuration) - registry.MustRegister(s.metrics.ReapDurationByLookupTable) - registry.MustRegister(s.metrics.RowsReapedByLookupTable) registry.MustRegister(s.metrics.StateVerifyDuration) registry.MustRegister(s.metrics.StateInvalidGauge) registry.MustRegister(s.metrics.LedgerStatsCounter) @@ -552,6 +536,7 @@ func (s *system) RegisterMetrics(registry *prometheus.Registry) { registry.MustRegister(s.metrics.IngestionErrorCounter) s.ledgerBackend = ledgerbackend.WithMetrics(s.ledgerBackend, registry, "horizon") s.reaper.RegisterMetrics(registry) + s.lookupTableReaper.RegisterMetrics(registry) } // Run starts ingestion system. Ingestion system supports distributed ingestion @@ -822,55 +807,11 @@ func (s *system) maybeReapLookupTables(lastIngestedLedger uint32) { return } - err = s.historyQ.Begin(s.ctx) - if err != nil { - log.WithError(err).Error("Error starting a transaction") - return - } - defer s.historyQ.Rollback() - - // If so block ingestion in the cluster to reap tables - _, err = s.historyQ.GetLastLedgerIngest(s.ctx) - if err != nil { - log.WithError(err).Error(getLastIngestedErrMsg) - return - } - - // Make sure reaping will not take more than 5s, which is average ledger - // closing time. - ctx, cancel := context.WithTimeout(s.ctx, 5*time.Second) - defer cancel() - - reapStart := time.Now() - results, err := s.historyQ.ReapLookupTables(ctx, reapLookupTablesBatchSize) - if err != nil { - log.WithError(err).Warn("Error reaping lookup tables") - return - } - - err = s.historyQ.Commit() - if err != nil { - log.WithError(err).Error("Error committing a transaction") - return - } - - totalDeleted := int64(0) - reapLog := log - for table, result := range results { - totalDeleted += result.RowsDeleted - reapLog = reapLog.WithField(table+"_offset", result.Offset) - reapLog = reapLog.WithField(table+"_duration", result.Duration) - reapLog = reapLog.WithField(table+"_rows_deleted", result.RowsDeleted) - s.Metrics().RowsReapedByLookupTable.With(prometheus.Labels{"table": table}).Observe(float64(result.RowsDeleted)) - s.Metrics().ReapDurationByLookupTable.With(prometheus.Labels{"table": table}).Observe(result.Duration.Seconds()) - } - - if totalDeleted > 0 { - reapLog.Info("Reaper deleted rows from lookup tables") - } - - s.Metrics().RowsReapedByLookupTable.With(prometheus.Labels{"table": "total"}).Observe(float64(totalDeleted)) - s.Metrics().ReapDurationByLookupTable.With(prometheus.Labels{"table": "total"}).Observe(time.Since(reapStart).Seconds()) + s.wg.Add(1) + go func() { + defer s.wg.Done() + s.lookupTableReaper.deleteOrphanedRows(s.ctx) + }() } func (s *system) incrementStateVerificationErrors() int { diff --git a/services/horizon/internal/ingest/main_test.go b/services/horizon/internal/ingest/main_test.go index d5733ee5e4..470039fd92 100644 --- a/services/horizon/internal/ingest/main_test.go +++ b/services/horizon/internal/ingest/main_test.go @@ -462,6 +462,11 @@ func (m *mockDBQ) TryReaperLock(ctx context.Context) (bool, error) { return args.Get(0).(bool), args.Error(1) } +func (m *mockDBQ) TryLookupTableReaperLock(ctx context.Context) (bool, error) { + args := m.Called(ctx) + return args.Get(0).(bool), args.Error(1) +} + func (m *mockDBQ) GetNextLedgerSequence(ctx context.Context, start uint32) (uint32, bool, error) { args := m.Called(ctx, start) return args.Get(0).(uint32), args.Get(1).(bool), args.Error(2) @@ -562,13 +567,14 @@ func (m *mockDBQ) NewTradeBatchInsertBuilder() history.TradeBatchInsertBuilder { return args.Get(0).(history.TradeBatchInsertBuilder) } -func (m *mockDBQ) ReapLookupTables(ctx context.Context, batchSize int) (map[string]history.LookupTableReapResult, error) { - args := m.Called(ctx, batchSize) - var r1 map[string]history.LookupTableReapResult - if args.Get(0) != nil { - r1 = args.Get(0).(map[string]history.LookupTableReapResult) - } - return r1, args.Error(2) +func (m *mockDBQ) FindLookupTableRowsToReap(ctx context.Context, table string, batchSize int) ([]int64, int64, error) { + args := m.Called(ctx, table, batchSize) + return args.Get(0).([]int64), args.Get(1).(int64), args.Error(2) +} + +func (m *mockDBQ) ReapLookupTable(ctx context.Context, table string, ids []int64, offset int64) (int64, error) { + args := m.Called(ctx, table, ids, offset) + return args.Get(0).(int64), args.Error(1) } func (m *mockDBQ) RebuildTradeAggregationTimes(ctx context.Context, from, to strtime.Millis, roundingSlippageFilter int) error { diff --git a/services/horizon/internal/ingest/processor_runner.go b/services/horizon/internal/ingest/processor_runner.go index e6f0e0cf74..75b8645953 100644 --- a/services/horizon/internal/ingest/processor_runner.go +++ b/services/horizon/internal/ingest/processor_runner.go @@ -133,11 +133,11 @@ func buildChangeProcessor( }) } -func (s *ProcessorRunner) buildTransactionProcessor(ledgersProcessor *processors.LedgersProcessor) (groupLoaders, *groupTransactionProcessors) { - accountLoader := history.NewAccountLoader() - assetLoader := history.NewAssetLoader() - lpLoader := history.NewLiquidityPoolLoader() - cbLoader := history.NewClaimableBalanceLoader() +func (s *ProcessorRunner) buildTransactionProcessor(ledgersProcessor *processors.LedgersProcessor, concurrencyMode history.ConcurrencyMode) (groupLoaders, *groupTransactionProcessors) { + accountLoader := history.NewAccountLoader(concurrencyMode) + assetLoader := history.NewAssetLoader(concurrencyMode) + lpLoader := history.NewLiquidityPoolLoader(concurrencyMode) + cbLoader := history.NewClaimableBalanceLoader(concurrencyMode) loaders := newGroupLoaders([]horizonLazyLoader{accountLoader, assetLoader, lpLoader, cbLoader}) statsLedgerTransactionProcessor := processors.NewStatsLedgerTransactionProcessor() @@ -366,7 +366,7 @@ func (s *ProcessorRunner) streamLedger(ledger xdr.LedgerCloseMeta, return nil } -func (s *ProcessorRunner) runTransactionProcessorsOnLedger(registry nameRegistry, ledger xdr.LedgerCloseMeta) ( +func (s *ProcessorRunner) runTransactionProcessorsOnLedger(registry nameRegistry, ledger xdr.LedgerCloseMeta, concurrencyMode history.ConcurrencyMode) ( transactionStats processors.StatsLedgerTransactionProcessorResults, transactionDurations runDurations, tradeStats processors.TradeStats, @@ -381,7 +381,7 @@ func (s *ProcessorRunner) runTransactionProcessorsOnLedger(registry nameRegistry groupTransactionFilterers := s.buildTransactionFilterer() // when in online mode, the submission result processor must always run (regardless of whether filter rules exist or not) groupFilteredOutProcessors := s.buildFilteredOutProcessor() - loaders, groupTransactionProcessors := s.buildTransactionProcessor(ledgersProcessor) + loaders, groupTransactionProcessors := s.buildTransactionProcessor(ledgersProcessor, concurrencyMode) if err = registerTransactionProcessors( registry, @@ -494,7 +494,7 @@ func (s *ProcessorRunner) RunTransactionProcessorsOnLedgers(ledgers []xdr.Ledger groupTransactionFilterers := s.buildTransactionFilterer() // intentionally skip filtered out processor groupFilteredOutProcessors := newGroupTransactionProcessors(nil, nil, nil) - loaders, groupTransactionProcessors := s.buildTransactionProcessor(ledgersProcessor) + loaders, groupTransactionProcessors := s.buildTransactionProcessor(ledgersProcessor, history.ConcurrentInserts) startTime := time.Now() curHeap, sysHeap := getMemStats() @@ -611,7 +611,7 @@ func (s *ProcessorRunner) RunAllProcessorsOnLedger(ledger xdr.LedgerCloseMeta) ( return } - transactionStats, transactionDurations, tradeStats, loaderDurations, loaderStats, err := s.runTransactionProcessorsOnLedger(registry, ledger) + transactionStats, transactionDurations, tradeStats, loaderDurations, loaderStats, err := s.runTransactionProcessorsOnLedger(registry, ledger, history.ConcurrentDeletes) stats.changeStats = changeStatsProcessor.GetResults() stats.changeDurations = groupChangeProcessors.processorsRunDurations diff --git a/services/horizon/internal/ingest/processor_runner_test.go b/services/horizon/internal/ingest/processor_runner_test.go index e6ce6b512c..82c712b737 100644 --- a/services/horizon/internal/ingest/processor_runner_test.go +++ b/services/horizon/internal/ingest/processor_runner_test.go @@ -248,7 +248,7 @@ func TestProcessorRunnerBuildTransactionProcessor(t *testing.T) { ledgersProcessor := &processors.LedgersProcessor{} - _, processor := runner.buildTransactionProcessor(ledgersProcessor) + _, processor := runner.buildTransactionProcessor(ledgersProcessor, history.ConcurrentInserts) assert.IsType(t, &groupTransactionProcessors{}, processor) assert.IsType(t, &processors.StatsLedgerTransactionProcessor{}, processor.processors[0]) assert.IsType(t, &processors.EffectProcessor{}, processor.processors[1]) diff --git a/services/horizon/internal/ingest/processors/claimable_balances_transaction_processor_test.go b/services/horizon/internal/ingest/processors/claimable_balances_transaction_processor_test.go index ca918e08ea..5967ef41b1 100644 --- a/services/horizon/internal/ingest/processors/claimable_balances_transaction_processor_test.go +++ b/services/horizon/internal/ingest/processors/claimable_balances_transaction_processor_test.go @@ -44,7 +44,7 @@ func (s *ClaimableBalancesTransactionProcessorTestSuiteLedger) SetupTest() { }, }, } - s.cbLoader = history.NewClaimableBalanceLoader() + s.cbLoader = history.NewClaimableBalanceLoader(history.ConcurrentInserts) s.processor = NewClaimableBalancesTransactionProcessor( s.cbLoader, diff --git a/services/horizon/internal/ingest/processors/effects_processor_test.go b/services/horizon/internal/ingest/processors/effects_processor_test.go index 0243768fde..276f6fcb03 100644 --- a/services/horizon/internal/ingest/processors/effects_processor_test.go +++ b/services/horizon/internal/ingest/processors/effects_processor_test.go @@ -7,11 +7,11 @@ import ( "crypto/rand" "encoding/hex" "encoding/json" + "math/big" + "strings" "testing" "github.com/guregu/null" - "math/big" - "strings" "github.com/stellar/go/keypair" "github.com/stellar/go/protocols/horizon/base" @@ -62,7 +62,7 @@ func TestEffectsProcessorTestSuiteLedger(t *testing.T) { func (s *EffectsProcessorTestSuiteLedger) SetupTest() { s.ctx = context.Background() - s.accountLoader = history.NewAccountLoader() + s.accountLoader = history.NewAccountLoader(history.ConcurrentInserts) s.mockBatchInsertBuilder = &history.MockEffectBatchInsertBuilder{} s.lcm = xdr.LedgerCloseMeta{ @@ -446,7 +446,7 @@ func TestEffectsCoversAllOperationTypes(t *testing.T) { } assert.True(t, err2 != nil || err == nil, s) }() - err = operation.ingestEffects(history.NewAccountLoader(), &history.MockEffectBatchInsertBuilder{}) + err = operation.ingestEffects(history.NewAccountLoader(history.ConcurrentInserts), &history.MockEffectBatchInsertBuilder{}) }() } @@ -468,7 +468,7 @@ func TestEffectsCoversAllOperationTypes(t *testing.T) { ledgerSequence: 1, } // calling effects should error due to the unknown operation - err := operation.ingestEffects(history.NewAccountLoader(), &history.MockEffectBatchInsertBuilder{}) + err := operation.ingestEffects(history.NewAccountLoader(history.ConcurrentInserts), &history.MockEffectBatchInsertBuilder{}) assert.Contains(t, err.Error(), "Unknown operation type") } @@ -2558,7 +2558,7 @@ type effect struct { } func assertIngestEffects(t *testing.T, operation transactionOperationWrapper, expected []effect) { - accountLoader := history.NewAccountLoader() + accountLoader := history.NewAccountLoader(history.ConcurrentInserts) mockBatchInsertBuilder := &history.MockEffectBatchInsertBuilder{} for _, expectedEffect := range expected { diff --git a/services/horizon/internal/ingest/processors/liquidity_pools_transaction_processor_test.go b/services/horizon/internal/ingest/processors/liquidity_pools_transaction_processor_test.go index 8d08e44d44..cdafc5bcc3 100644 --- a/services/horizon/internal/ingest/processors/liquidity_pools_transaction_processor_test.go +++ b/services/horizon/internal/ingest/processors/liquidity_pools_transaction_processor_test.go @@ -44,7 +44,7 @@ func (s *LiquidityPoolsTransactionProcessorTestSuiteLedger) SetupTest() { }, }, } - s.lpLoader = history.NewLiquidityPoolLoader() + s.lpLoader = history.NewLiquidityPoolLoader(history.ConcurrentInserts) s.processor = NewLiquidityPoolsTransactionProcessor( s.lpLoader, diff --git a/services/horizon/internal/ingest/processors/participants_processor_test.go b/services/horizon/internal/ingest/processors/participants_processor_test.go index b81bd22f67..f6154b2b39 100644 --- a/services/horizon/internal/ingest/processors/participants_processor_test.go +++ b/services/horizon/internal/ingest/processors/participants_processor_test.go @@ -86,7 +86,7 @@ func (s *ParticipantsProcessorTestSuiteLedger) SetupTest() { s.thirdTx.Envelope.V1.Tx.SourceAccount = aid.ToMuxedAccount() s.thirdTxID = toid.New(int32(sequence), 3, 0).ToInt64() - s.accountLoader = history.NewAccountLoader() + s.accountLoader = history.NewAccountLoader(history.ConcurrentInserts) s.addressToFuture = map[string]history.FutureAccountID{} for _, address := range s.addresses { s.addressToFuture[address] = s.accountLoader.GetFuture(address) diff --git a/services/horizon/internal/ingest/reap.go b/services/horizon/internal/ingest/reap.go index 07a61a4cde..63b56de993 100644 --- a/services/horizon/internal/ingest/reap.go +++ b/services/horizon/internal/ingest/reap.go @@ -16,6 +16,8 @@ import ( "github.com/stellar/go/toid" ) +const reapLookupTablesBatchSize = 1000 + // Reaper represents the history reaping subsystem of horizon. type Reaper struct { historyQ history.IngestionQ @@ -243,3 +245,117 @@ func (r *Reaper) deleteBatch(ctx context.Context, batchStartSeq, batchEndSeq uin r.deleteBatchDuration.Observe(elapsedSeconds) return count, nil } + +type lookupTableReaper struct { + historyQ history.IngestionQ + reapLockQ history.IngestionQ + pending atomic.Bool + logger *logpkg.Entry + + reapDurationByLookupTable *prometheus.SummaryVec + rowsReapedByLookupTable *prometheus.SummaryVec +} + +func newLookupTableReaper(dbSession db.SessionInterface) *lookupTableReaper { + return &lookupTableReaper{ + historyQ: &history.Q{dbSession.Clone()}, + reapLockQ: &history.Q{dbSession.Clone()}, + pending: atomic.Bool{}, + logger: log.WithField("subservice", "lookuptable-reaper"), + reapDurationByLookupTable: prometheus.NewSummaryVec(prometheus.SummaryOpts{ + Namespace: "horizon", Subsystem: "ingest", Name: "reap_lookup_tables_duration_seconds", + Help: "reap lookup tables durations, sliding window = 10m", + Objectives: map[float64]float64{0.5: 0.05, 0.9: 0.01, 0.99: 0.001}, + }, []string{"table", "type"}), + rowsReapedByLookupTable: prometheus.NewSummaryVec(prometheus.SummaryOpts{ + Namespace: "horizon", Subsystem: "ingest", Name: "reap_lookup_tables_rows_reaped", + Help: "rows deleted during lookup tables reap, sliding window = 10m", + Objectives: map[float64]float64{0.5: 0.05, 0.9: 0.01, 0.99: 0.001}, + }, []string{"table"}), + } +} + +func (r *lookupTableReaper) RegisterMetrics(registry *prometheus.Registry) { + registry.MustRegister( + r.reapDurationByLookupTable, + r.rowsReapedByLookupTable, + ) +} + +func (r *lookupTableReaper) deleteOrphanedRows(ctx context.Context) error { + // check if reap is already in progress on this horizon node + if !r.pending.CompareAndSwap(false, true) { + r.logger.Infof("existing reap already in progress, skipping request to start a new one") + return nil + } + defer r.pending.Store(false) + + if err := r.reapLockQ.Begin(ctx); err != nil { + return errors.Wrap(err, "error while starting reaper lock transaction") + } + defer func() { + if err := r.reapLockQ.Rollback(); err != nil { + r.logger.WithField("error", err).Error("failed to release reaper lock") + } + }() + // check if reap is already in progress on another horizon node + if acquired, err := r.reapLockQ.TryLookupTableReaperLock(ctx); err != nil { + return errors.Wrap(err, "error while acquiring reaper database lock") + } else if !acquired { + r.logger.Info("reap already in progress on another node") + return nil + } + + reapStart := time.Now() + var totalQueryDuration, totalDeleteDuration time.Duration + var totalDeleted int64 + for _, table := range []string{ + "history_accounts", "history_claimable_balances", + "history_assets", "history_liquidity_pools", + } { + startTime := time.Now() + ids, offset, err := r.historyQ.FindLookupTableRowsToReap(ctx, table, reapLookupTablesBatchSize) + if err != nil { + r.logger.WithField("table", table).WithError(err).Warn("Error finding orphaned rows") + return err + } + queryDuration := time.Since(startTime) + totalQueryDuration += queryDuration + + deleteStartTime := time.Now() + var rowsDeleted int64 + rowsDeleted, err = r.historyQ.ReapLookupTable(ctx, table, ids, offset) + if err != nil { + r.logger.WithField("table", table).WithError(err).Warn("Error deleting orphaned rows") + return err + } + deleteDuration := time.Since(deleteStartTime) + totalDeleteDuration += deleteDuration + + r.rowsReapedByLookupTable.With(prometheus.Labels{"table": table}). + Observe(float64(rowsDeleted)) + r.reapDurationByLookupTable.With(prometheus.Labels{"table": table, "type": "query"}). + Observe(float64(queryDuration.Seconds())) + r.reapDurationByLookupTable.With(prometheus.Labels{"table": table, "type": "delete"}). + Observe(float64(deleteDuration.Seconds())) + r.reapDurationByLookupTable.With(prometheus.Labels{"table": table, "type": "total"}). + Observe(float64((queryDuration + deleteDuration).Seconds())) + + r.logger.WithField("table", table). + WithField("offset", offset). + WithField("rows_deleted", rowsDeleted). + WithField("query_duration", queryDuration.Seconds()). + WithField("delete_duration", deleteDuration.Seconds()). + Info("Reaper deleted rows from lookup tables") + } + + r.rowsReapedByLookupTable.With(prometheus.Labels{"table": "total"}). + Observe(float64(totalDeleted)) + r.reapDurationByLookupTable.With(prometheus.Labels{"table": "total", "type": "query"}). + Observe(float64(totalQueryDuration.Seconds())) + r.reapDurationByLookupTable.With(prometheus.Labels{"table": "total", "type": "delete"}). + Observe(float64(totalDeleteDuration.Seconds())) + r.reapDurationByLookupTable.With(prometheus.Labels{"table": "total", "type": "total"}). + Observe(time.Since(reapStart).Seconds()) + return nil +} diff --git a/services/horizon/internal/ingest/resume_state_test.go b/services/horizon/internal/ingest/resume_state_test.go index feb5e13bb0..534ec555f6 100644 --- a/services/horizon/internal/ingest/resume_state_test.go +++ b/services/horizon/internal/ingest/resume_state_test.go @@ -402,10 +402,6 @@ func (s *ResumeTestTestSuite) TestReapingObjectsDisabled() { } func (s *ResumeTestTestSuite) TestErrorReapingObjectsIgnored() { - s.system.config.ReapLookupTables = true - defer func() { - s.system.config.ReapLookupTables = false - }() s.historyQ.On("Begin", s.ctx).Return(nil).Once() s.historyQ.On("GetLastLedgerIngest", s.ctx).Return(uint32(100), nil).Once() s.historyQ.On("GetIngestVersion", s.ctx).Return(CurrentVersion, nil).Once() @@ -425,12 +421,6 @@ func (s *ResumeTestTestSuite) TestErrorReapingObjectsIgnored() { s.historyQ.On("GetExpStateInvalid", s.ctx).Return(false, nil).Once() s.historyQ.On("RebuildTradeAggregationBuckets", s.ctx, uint32(101), uint32(101), 0).Return(nil).Once() - // Reap lookup tables: - s.ledgerBackend.On("GetLatestLedgerSequence", s.ctx).Return(uint32(101), nil) - s.historyQ.On("Begin", s.ctx).Return(nil).Once() - s.historyQ.On("GetLastLedgerIngest", s.ctx).Return(uint32(100), nil).Once() - s.historyQ.On("ReapLookupTables", mock.AnythingOfType("*context.timerCtx"), mock.Anything).Return(nil, nil, errors.New("error reaping objects")).Once() - s.historyQ.On("Rollback").Return(nil).Once() mockStats := &historyarchive.MockArchiveStats{} mockStats.On("GetBackendName").Return("name") mockStats.On("GetDownloads").Return(uint32(0))