Skip to content

Commit

Permalink
PRS and ERS don't promote replicas taking backups
Browse files Browse the repository at this point in the history
Signed-off-by: Eduardo J. Ortega U <5791035+ejortegau@users.noreply.github.com>
  • Loading branch information
ejortegau committed Oct 18, 2024
1 parent e881b9f commit b2c0b5c
Show file tree
Hide file tree
Showing 16 changed files with 1,221 additions and 892 deletions.
216 changes: 118 additions & 98 deletions go/vt/proto/replicationdata/replicationdata.pb.go

Large diffs are not rendered by default.

70 changes: 70 additions & 0 deletions go/vt/proto/replicationdata/replicationdata_vtproto.pb.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1,566 changes: 793 additions & 773 deletions go/vt/proto/tabletmanagerdata/tabletmanagerdata.pb.go

Large diffs are not rendered by default.

68 changes: 68 additions & 0 deletions go/vt/proto/tabletmanagerdata/tabletmanagerdata_vtproto.pb.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

5 changes: 5 additions & 0 deletions go/vt/vtctl/grpcvtctldserver/testutil/test_tmclient.go
Original file line number Diff line number Diff line change
Expand Up @@ -188,6 +188,8 @@ type TabletManagerClient struct {
EventJitter time.Duration
ErrorAfter time.Duration
}
// Backing Up - keyed by tablet alias.
BackingUp map[string]bool
// keyed by tablet alias.
ChangeTagsResult map[string]struct {
Response *tabletmanagerdatapb.ChangeTagsResponse
Expand Down Expand Up @@ -1052,6 +1054,9 @@ func (fake *TabletManagerClient) ReplicationStatus(ctx context.Context, tablet *
}

if result, ok := fake.ReplicationStatusResults[key]; ok {
if _, ok = fake.BackingUp[key]; ok {
result.Position.BackingUp = fake.BackingUp[key]
}
return result.Position, result.Error
}

Expand Down
13 changes: 11 additions & 2 deletions go/vt/vtctl/reparentutil/emergency_reparenter.go
Original file line number Diff line number Diff line change
Expand Up @@ -249,7 +249,7 @@ func (erp *EmergencyReparenter) reparentShardLocked(ctx context.Context, ev *eve
// 2. Remove the tablets with the Must_not promote rule
// 3. Remove cross-cell tablets if PreventCrossCellPromotion is specified
// Our final primary candidate MUST belong to this list of valid candidates
validCandidateTablets, err = erp.filterValidCandidates(validCandidateTablets, stoppedReplicationSnapshot.reachableTablets, prevPrimary, opts)
validCandidateTablets, err = erp.filterValidCandidates(validCandidateTablets, stoppedReplicationSnapshot.reachableTablets, stoppedReplicationSnapshot.backingUpTablets, prevPrimary, opts)
if err != nil {
return err
}
Expand Down Expand Up @@ -729,7 +729,7 @@ func (erp *EmergencyReparenter) identifyPrimaryCandidate(
}

// filterValidCandidates filters valid tablets, keeping only the ones which can successfully be promoted without any constraint failures and can make forward progress on being promoted
func (erp *EmergencyReparenter) filterValidCandidates(validTablets []*topodatapb.Tablet, tabletsReachable []*topodatapb.Tablet, prevPrimary *topodatapb.Tablet, opts EmergencyReparentOptions) ([]*topodatapb.Tablet, error) {
func (erp *EmergencyReparenter) filterValidCandidates(validTablets []*topodatapb.Tablet, tabletsReachable []*topodatapb.Tablet, tabletsBackingUp map[string]bool, prevPrimary *topodatapb.Tablet, opts EmergencyReparentOptions) ([]*topodatapb.Tablet, error) {
var restrictedValidTablets []*topodatapb.Tablet
for _, tablet := range validTablets {
tabletAliasStr := topoproto.TabletAliasString(tablet.Alias)
Expand Down Expand Up @@ -757,6 +757,15 @@ func (erp *EmergencyReparenter) filterValidCandidates(validTablets []*topodatapb
}
continue
}
// Remove candidates that are running a backup.
backingUp, ok := tabletsBackingUp[tabletAliasStr]
if ok && backingUp {
erp.logger.Infof("Removing %s from list of valid candidates for promotion because it is running a backup", tabletAliasStr)
if opts.NewPrimaryAlias != nil && topoproto.TabletAliasEqual(opts.NewPrimaryAlias, tablet.Alias) {
return nil, vterrors.Errorf(vtrpc.Code_ABORTED, "proposed primary %s will not be able to make forward progress on being promoted", topoproto.TabletAliasString(opts.NewPrimaryAlias))
}
continue
}
restrictedValidTablets = append(restrictedValidTablets, tablet)
}
return restrictedValidTablets, nil
Expand Down
26 changes: 25 additions & 1 deletion go/vt/vtctl/reparentutil/emergency_reparenter_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -4460,11 +4460,22 @@ func TestEmergencyReparenter_filterValidCandidates(t *testing.T) {
}
)
allTablets := []*topodatapb.Tablet{primaryTablet, replicaTablet, rdonlyTablet, replicaCrossCellTablet, rdonlyCrossCellTablet}
noTabletsBackingUp := map[string]bool{
topoproto.TabletAliasString(primaryTablet.Alias): false, topoproto.TabletAliasString(replicaTablet.Alias): false,
topoproto.TabletAliasString(rdonlyTablet.Alias): false, topoproto.TabletAliasString(replicaCrossCellTablet.Alias): false,
topoproto.TabletAliasString(rdonlyCrossCellTablet.Alias): false,
}
replicaBackingUp := map[string]bool{
topoproto.TabletAliasString(primaryTablet.Alias): false, topoproto.TabletAliasString(replicaTablet.Alias): true,
topoproto.TabletAliasString(rdonlyTablet.Alias): false, topoproto.TabletAliasString(replicaCrossCellTablet.Alias): false,
topoproto.TabletAliasString(rdonlyCrossCellTablet.Alias): false,
}
tests := []struct {
name string
durability string
validTablets []*topodatapb.Tablet
tabletsReachable []*topodatapb.Tablet
tabletsBackingUp map[string]bool
prevPrimary *topodatapb.Tablet
opts EmergencyReparentOptions
filteredTablets []*topodatapb.Tablet
Expand All @@ -4475,12 +4486,21 @@ func TestEmergencyReparenter_filterValidCandidates(t *testing.T) {
durability: "none",
validTablets: allTablets,
tabletsReachable: allTablets,
tabletsBackingUp: noTabletsBackingUp,
filteredTablets: []*topodatapb.Tablet{primaryTablet, replicaTablet, replicaCrossCellTablet},
}, {
name: "filter backing up",
durability: "none",
validTablets: allTablets,
tabletsReachable: allTablets,
tabletsBackingUp: replicaBackingUp,
filteredTablets: []*topodatapb.Tablet{primaryTablet, replicaCrossCellTablet},
}, {
name: "filter cross cell",
durability: "none",
validTablets: allTablets,
tabletsReachable: allTablets,
tabletsBackingUp: noTabletsBackingUp,
prevPrimary: &topodatapb.Tablet{
Alias: &topodatapb.TabletAlias{
Cell: "zone-1",
Expand All @@ -4495,6 +4515,7 @@ func TestEmergencyReparenter_filterValidCandidates(t *testing.T) {
durability: "cross_cell",
validTablets: []*topodatapb.Tablet{primaryTablet, replicaTablet},
tabletsReachable: []*topodatapb.Tablet{primaryTablet, replicaTablet, rdonlyTablet, rdonlyCrossCellTablet},
tabletsBackingUp: noTabletsBackingUp,
filteredTablets: nil,
}, {
name: "filter mixed",
Expand All @@ -4509,12 +4530,14 @@ func TestEmergencyReparenter_filterValidCandidates(t *testing.T) {
},
validTablets: allTablets,
tabletsReachable: allTablets,
tabletsBackingUp: noTabletsBackingUp,
filteredTablets: []*topodatapb.Tablet{replicaCrossCellTablet},
}, {
name: "error - requested primary must not",
durability: "none",
validTablets: allTablets,
tabletsReachable: allTablets,
tabletsBackingUp: noTabletsBackingUp,
opts: EmergencyReparentOptions{
NewPrimaryAlias: rdonlyTablet.Alias,
},
Expand All @@ -4535,6 +4558,7 @@ func TestEmergencyReparenter_filterValidCandidates(t *testing.T) {
durability: "cross_cell",
validTablets: allTablets,
tabletsReachable: []*topodatapb.Tablet{primaryTablet, replicaTablet, rdonlyTablet, rdonlyCrossCellTablet},
tabletsBackingUp: noTabletsBackingUp,
opts: EmergencyReparentOptions{
NewPrimaryAlias: primaryTablet.Alias,
},
Expand All @@ -4548,7 +4572,7 @@ func TestEmergencyReparenter_filterValidCandidates(t *testing.T) {
tt.opts.durability = durability
logger := logutil.NewMemoryLogger()
erp := NewEmergencyReparenter(nil, nil, logger)
tabletList, err := erp.filterValidCandidates(tt.validTablets, tt.tabletsReachable, tt.prevPrimary, tt.opts)
tabletList, err := erp.filterValidCandidates(tt.validTablets, tt.tabletsReachable, tt.tabletsBackingUp, tt.prevPrimary, tt.opts)
if tt.errShouldContain != "" {
require.Error(t, err)
require.Contains(t, err.Error(), tt.errShouldContain)
Expand Down
Loading

0 comments on commit b2c0b5c

Please sign in to comment.