diff --git a/go/vt/vtgate/tabletgateway.go b/go/vt/vtgate/tabletgateway.go index c36c6981fa2..48cd37b28fe 100644 --- a/go/vt/vtgate/tabletgateway.go +++ b/go/vt/vtgate/tabletgateway.go @@ -207,9 +207,8 @@ func (gw *TabletGateway) WaitForTablets(ctx context.Context, tabletTypesToWait [ case context.DeadlineExceeded: // In this scenario, we were able to reach the // topology service, but some tablets may not be - // ready. We just warn and keep going. + // ready. log.Warningf("Timeout waiting for all keyspaces / shards to have healthy tablets of types %v, may be in degraded mode", tabletTypesToWait) - err = nil } }() diff --git a/go/vt/vtgate/vtgate.go b/go/vt/vtgate/vtgate.go index 8b8302d77d4..298f683be16 100644 --- a/go/vt/vtgate/vtgate.go +++ b/go/vt/vtgate/vtgate.go @@ -295,8 +295,18 @@ func Init( // TabletGateway can create it's own healthcheck gw := NewTabletGateway(ctx, hc, serv, cell) gw.RegisterStats() - if err := gw.WaitForTablets(ctx, tabletTypesToWait); err != nil { - log.Fatalf("tabletGateway.WaitForTablets failed: %v", err) + + // Retry loop for potential time-outs waiting for all tablets. + for { + err := gw.WaitForTablets(ctx, tabletTypesToWait) + switch { + case errors.Is(err, context.DeadlineExceeded): + log.Warning("TabletGateway timed out waiting for tablets to become available - retrying.") + + continue + default: + log.Fatalf("tabletGateway.WaitForTablets failed: %v", err) + } } dynamicConfig := NewDynamicViperConfig()