Skip to content

Commit

Permalink
cluster: fix tso fallback due raft cluster did not stop tso service (#…
Browse files Browse the repository at this point in the history
…8837) (#8838)

close #8836

Signed-off-by: lhy1024 <admin@liudos.us>

Co-authored-by: lhy1024 <admin@liudos.us>
  • Loading branch information
ti-chi-bot and lhy1024 authored Nov 21, 2024
1 parent fb992f9 commit 3994172
Show file tree
Hide file tree
Showing 2 changed files with 12 additions and 4 deletions.
14 changes: 11 additions & 3 deletions server/cluster/cluster.go
Original file line number Diff line number Diff line change
Expand Up @@ -491,6 +491,8 @@ func (c *RaftCluster) startTSOJobsIfNeeded() error {
log.Error("failed to initialize the global TSO allocator", errs.ZapError(err))
return err
}
} else {
log.Warn("the global TSO allocator is already initialized")
}
return nil
}
Expand Down Expand Up @@ -855,6 +857,15 @@ func (c *RaftCluster) runReplicationMode() {
// Stop stops the cluster.
func (c *RaftCluster) Stop() {
c.Lock()
// We need to try to stop tso jobs whatever the cluster is running or not.
// Because we need to call checkTSOService as soon as possible while the cluster is starting,
// which makes the cluster may not be running but the tso job has been started.
// For example, the cluster meets an error when starting, such as cluster is not bootstrapped.
// In this case, the `running` in `RaftCluster` is false, but the tso job has been started.
// Ref: https://github.com/tikv/pd/issues/8836
if err := c.stopTSOJobsIfNeeded(); err != nil {
log.Error("failed to stop tso jobs", errs.ZapError(err))
}
if !c.running {
c.Unlock()
return
Expand All @@ -864,9 +875,6 @@ func (c *RaftCluster) Stop() {
if !c.IsServiceIndependent(constant.SchedulingServiceName) {
c.stopSchedulingJobs()
}
if err := c.stopTSOJobsIfNeeded(); err != nil {
log.Error("failed to stop tso jobs", errs.ZapError(err))
}
c.heartbeatRunner.Stop()
c.miscRunner.Stop()
c.logRunner.Stop()
Expand Down
2 changes: 1 addition & 1 deletion tests/integrations/client/client_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -197,11 +197,11 @@ func TestLeaderTransferAndMoveCluster(t *testing.T) {
oldServers := cluster.GetServers()
oldLeaderName := cluster.WaitLeader()
for range 3 {
time.Sleep(5 * time.Second)
newPD, err := cluster.Join(ctx)
re.NoError(err)
re.NoError(newPD.Run())
oldLeaderName = cluster.WaitLeader()
time.Sleep(5 * time.Second)
}

// ABCDEF->DEF
Expand Down

0 comments on commit 3994172

Please sign in to comment.