diff --git a/cmd/allocator/main.go b/cmd/allocator/main.go index f1229b41ba..64b7342b84 100644 --- a/cmd/allocator/main.go +++ b/cmd/allocator/main.go @@ -114,7 +114,7 @@ func main() { return err }) - h := newServiceHandler(kubeClient, agonesClient, health, conf.MTLSDisabled, conf.TLSDisabled, conf.remoteAllocationTimeout, conf.totalRemoteAllocationTimeout) + h := newServiceHandler(kubeClient, agonesClient, health, conf.MTLSDisabled, conf.TLSDisabled, conf.remoteAllocationTimeout, conf.totalRemoteAllocationTimeout, conf.allocationBatchWaitTime) if !h.tlsDisabled { watcherTLS, err := fsnotify.NewWatcher() @@ -280,7 +280,7 @@ func runGRPC(h *serviceHandler, grpcPort int) { }() } -func newServiceHandler(kubeClient kubernetes.Interface, agonesClient versioned.Interface, health healthcheck.Handler, mTLSDisabled bool, tlsDisabled bool, remoteAllocationTimeout time.Duration, totalRemoteAllocationTimeout time.Duration) *serviceHandler { +func newServiceHandler(kubeClient kubernetes.Interface, agonesClient versioned.Interface, health healthcheck.Handler, mTLSDisabled bool, tlsDisabled bool, remoteAllocationTimeout time.Duration, totalRemoteAllocationTimeout time.Duration, allocationBatchWaitTime time.Duration) *serviceHandler { defaultResync := 30 * time.Second agonesInformerFactory := externalversions.NewSharedInformerFactory(agonesClient, defaultResync) kubeInformerFactory := informers.NewSharedInformerFactory(kubeClient, defaultResync) @@ -293,7 +293,8 @@ func newServiceHandler(kubeClient kubernetes.Interface, agonesClient versioned.I kubeClient, gameserverallocations.NewAllocationCache(agonesInformerFactory.Agones().V1().GameServers(), gsCounter, health), remoteAllocationTimeout, - totalRemoteAllocationTimeout) + totalRemoteAllocationTimeout, + allocationBatchWaitTime) ctx := signals.NewSigKillContext() h := serviceHandler{ diff --git a/cmd/allocator/metrics.go b/cmd/allocator/metrics.go index 5c5db38c19..1ca4d488a4 100644 --- a/cmd/allocator/metrics.go +++ b/cmd/allocator/metrics.go @@ -42,6 +42,7 @@ const ( apiServerSustainedQPSFlag = "api-server-qps" apiServerBurstQPSFlag = "api-server-qps-burst" logLevelFlag = "log-level" + allocationBatchWaitTime = "allocation-batch-wait-time" ) func init() { @@ -62,6 +63,7 @@ type config struct { LogLevel string totalRemoteAllocationTimeout time.Duration remoteAllocationTimeout time.Duration + allocationBatchWaitTime time.Duration } func parseEnvFlags() config { @@ -79,6 +81,7 @@ func parseEnvFlags() config { viper.SetDefault(remoteAllocationTimeoutFlag, 10*time.Second) viper.SetDefault(totalRemoteAllocationTimeoutFlag, 30*time.Second) viper.SetDefault(logLevelFlag, "Info") + viper.SetDefault(allocationBatchWaitTime, 500*time.Millisecond) pflag.Int32(httpPortFlag, viper.GetInt32(httpPortFlag), "Port to listen on for REST requests") pflag.Int32(grpcPortFlag, viper.GetInt32(grpcPortFlag), "Port to listen on for gRPC requests") @@ -93,6 +96,7 @@ func parseEnvFlags() config { pflag.Duration(remoteAllocationTimeoutFlag, viper.GetDuration(remoteAllocationTimeoutFlag), "Flag to set remote allocation call timeout.") pflag.Duration(totalRemoteAllocationTimeoutFlag, viper.GetDuration(totalRemoteAllocationTimeoutFlag), "Flag to set total remote allocation timeout including retries.") pflag.String(logLevelFlag, viper.GetString(logLevelFlag), "Agones Log level") + pflag.Duration(allocationBatchWaitTime, viper.GetDuration(allocationBatchWaitTime), "Flag to configure the waiting period between allocations batches") runtime.FeaturesBindFlags() pflag.Parse() @@ -127,6 +131,7 @@ func parseEnvFlags() config { LogLevel: viper.GetString(logLevelFlag), remoteAllocationTimeout: viper.GetDuration(remoteAllocationTimeoutFlag), totalRemoteAllocationTimeout: viper.GetDuration(totalRemoteAllocationTimeoutFlag), + allocationBatchWaitTime: viper.GetDuration(allocationBatchWaitTime), } } diff --git a/cmd/controller/main.go b/cmd/controller/main.go index b2c47baf86..d05361ab23 100644 --- a/cmd/controller/main.go +++ b/cmd/controller/main.go @@ -77,6 +77,7 @@ const ( logLevelFlag = "log-level" logSizeLimitMBFlag = "log-size-limit-mb" kubeconfigFlag = "kubeconfig" + allocationBatchWaitTime = "allocation-batch-wait-time" defaultResync = 30 * time.Second ) @@ -210,7 +211,8 @@ func main() { gsSetController := gameserversets.NewController(wh, health, gsCounter, kubeClient, extClient, agonesClient, agonesInformerFactory) fleetController := fleets.NewController(wh, health, kubeClient, extClient, agonesClient, agonesInformerFactory) - gasController := gameserverallocations.NewController(api, health, gsCounter, kubeClient, kubeInformerFactory, agonesClient, agonesInformerFactory, 10*time.Second, 30*time.Second) + gasController := gameserverallocations.NewController(api, health, gsCounter, kubeClient, kubeInformerFactory, + agonesClient, agonesInformerFactory, 10*time.Second, 30*time.Second, ctlConf.AllocationBatchWaitTime) fasController := fleetautoscalers.NewController(wh, health, kubeClient, extClient, agonesClient, agonesInformerFactory) @@ -253,6 +255,7 @@ func parseEnvFlags() config { viper.SetDefault(enablePrometheusMetricsFlag, true) viper.SetDefault(enableStackdriverMetricsFlag, false) viper.SetDefault(stackdriverLabels, "") + viper.SetDefault(allocationBatchWaitTime, 500*time.Millisecond) viper.SetDefault(projectIDFlag, "") viper.SetDefault(numWorkersFlag, 64) @@ -284,6 +287,7 @@ func parseEnvFlags() config { pflag.String(logDirFlag, viper.GetString(logDirFlag), "If set, store logs in a given directory.") pflag.Int32(logSizeLimitMBFlag, 1000, "Log file size limit in MB") pflag.String(logLevelFlag, viper.GetString(logLevelFlag), "Agones Log level") + pflag.Duration(allocationBatchWaitTime, viper.GetDuration(allocationBatchWaitTime), "Flag to configure the waiting period between allocations batches") runtime.FeaturesBindFlags() pflag.Parse() @@ -336,55 +340,57 @@ func parseEnvFlags() config { } return config{ - MinPort: int32(viper.GetInt64(minPortFlag)), - MaxPort: int32(viper.GetInt64(maxPortFlag)), - SidecarImage: viper.GetString(sidecarImageFlag), - SidecarCPURequest: requestCPU, - SidecarCPULimit: limitCPU, - SidecarMemoryRequest: requestMemory, - SidecarMemoryLimit: limitMemory, - SdkServiceAccount: viper.GetString(sdkServerAccountFlag), - AlwaysPullSidecar: viper.GetBool(pullSidecarFlag), - KeyFile: viper.GetString(keyFileFlag), - CertFile: viper.GetString(certFileFlag), - KubeConfig: viper.GetString(kubeconfigFlag), - PrometheusMetrics: viper.GetBool(enablePrometheusMetricsFlag), - Stackdriver: viper.GetBool(enableStackdriverMetricsFlag), - GCPProjectID: viper.GetString(projectIDFlag), - NumWorkers: int(viper.GetInt32(numWorkersFlag)), - APIServerSustainedQPS: int(viper.GetInt32(apiServerSustainedQPSFlag)), - APIServerBurstQPS: int(viper.GetInt32(apiServerBurstQPSFlag)), - LogDir: viper.GetString(logDirFlag), - LogLevel: viper.GetString(logLevelFlag), - LogSizeLimitMB: int(viper.GetInt32(logSizeLimitMBFlag)), - StackdriverLabels: viper.GetString(stackdriverLabels), + MinPort: int32(viper.GetInt64(minPortFlag)), + MaxPort: int32(viper.GetInt64(maxPortFlag)), + SidecarImage: viper.GetString(sidecarImageFlag), + SidecarCPURequest: requestCPU, + SidecarCPULimit: limitCPU, + SidecarMemoryRequest: requestMemory, + SidecarMemoryLimit: limitMemory, + SdkServiceAccount: viper.GetString(sdkServerAccountFlag), + AlwaysPullSidecar: viper.GetBool(pullSidecarFlag), + KeyFile: viper.GetString(keyFileFlag), + CertFile: viper.GetString(certFileFlag), + KubeConfig: viper.GetString(kubeconfigFlag), + PrometheusMetrics: viper.GetBool(enablePrometheusMetricsFlag), + Stackdriver: viper.GetBool(enableStackdriverMetricsFlag), + GCPProjectID: viper.GetString(projectIDFlag), + NumWorkers: int(viper.GetInt32(numWorkersFlag)), + APIServerSustainedQPS: int(viper.GetInt32(apiServerSustainedQPSFlag)), + APIServerBurstQPS: int(viper.GetInt32(apiServerBurstQPSFlag)), + LogDir: viper.GetString(logDirFlag), + LogLevel: viper.GetString(logLevelFlag), + LogSizeLimitMB: int(viper.GetInt32(logSizeLimitMBFlag)), + StackdriverLabels: viper.GetString(stackdriverLabels), + AllocationBatchWaitTime: viper.GetDuration(allocationBatchWaitTime), } } // config stores all required configuration to create a game server controller. type config struct { - MinPort int32 - MaxPort int32 - SidecarImage string - SidecarCPURequest resource.Quantity - SidecarCPULimit resource.Quantity - SidecarMemoryRequest resource.Quantity - SidecarMemoryLimit resource.Quantity - SdkServiceAccount string - AlwaysPullSidecar bool - PrometheusMetrics bool - Stackdriver bool - StackdriverLabels string - KeyFile string - CertFile string - KubeConfig string - GCPProjectID string - NumWorkers int - APIServerSustainedQPS int - APIServerBurstQPS int - LogDir string - LogLevel string - LogSizeLimitMB int + MinPort int32 + MaxPort int32 + SidecarImage string + SidecarCPURequest resource.Quantity + SidecarCPULimit resource.Quantity + SidecarMemoryRequest resource.Quantity + SidecarMemoryLimit resource.Quantity + SdkServiceAccount string + AlwaysPullSidecar bool + PrometheusMetrics bool + Stackdriver bool + StackdriverLabels string + KeyFile string + CertFile string + KubeConfig string + GCPProjectID string + NumWorkers int + APIServerSustainedQPS int + APIServerBurstQPS int + LogDir string + LogLevel string + LogSizeLimitMB int + AllocationBatchWaitTime time.Duration } // validate ensures the ctlConfig data is valid. diff --git a/install/helm/agones/templates/controller.yaml b/install/helm/agones/templates/controller.yaml index 1adb6eadb5..43a341e81e 100644 --- a/install/helm/agones/templates/controller.yaml +++ b/install/helm/agones/templates/controller.yaml @@ -113,6 +113,8 @@ spec: value: {{ .Values.agones.controller.logLevel | quote }} - name: FEATURE_GATES value: {{ .Values.agones.featureGates | quote }} + - name: ALLOCATION_BATCH_WAIT_TIME + value: {{ .Values.agones.controller.allocationBatchWaitTime | quote }} {{- if .Values.agones.controller.persistentLogs }} - name: LOG_DIR value: "/home/agones/logs" diff --git a/install/helm/agones/templates/service/allocation.yaml b/install/helm/agones/templates/service/allocation.yaml index 48173ca648..aaa4cd63e2 100644 --- a/install/helm/agones/templates/service/allocation.yaml +++ b/install/helm/agones/templates/service/allocation.yaml @@ -241,6 +241,8 @@ spec: value: {{ .Values.agones.allocator.logLevel | quote }} - name: FEATURE_GATES value: {{ .Values.agones.featureGates | quote }} + - name: ALLOCATION_BATCH_WAIT_TIME + value: {{ .Values.agones.allocator.allocationBatchWaitTime | quote }} ports: {{- if .Values.agones.allocator.service.http.enabled }} - name: {{ .Values.agones.allocator.service.http.portName }} diff --git a/install/helm/agones/values.yaml b/install/helm/agones/values.yaml index 18d1c679c6..d76a4b8642 100644 --- a/install/helm/agones/values.yaml +++ b/install/helm/agones/values.yaml @@ -92,6 +92,7 @@ agones: periodSeconds: 3 failureThreshold: 3 timeoutSeconds: 1 + allocationBatchWaitTime: 500ms ping: install: true resources: {} @@ -199,6 +200,7 @@ agones: disableTLS: false remoteAllocationTimeout: 10s totalRemoteAllocationTimeout: 30s + allocationBatchWaitTime: 500ms image: registry: gcr.io/agones-images tag: 1.24.0-dev diff --git a/install/yaml/install.yaml b/install/yaml/install.yaml index ef48988c5c..803602292d 100644 --- a/install/yaml/install.yaml +++ b/install/yaml/install.yaml @@ -14040,6 +14040,8 @@ spec: value: "info" - name: FEATURE_GATES value: "" + - name: ALLOCATION_BATCH_WAIT_TIME + value: "500ms" - name: LOG_DIR value: "/home/agones/logs" - name: LOG_SIZE_LIMIT_MB @@ -14264,6 +14266,8 @@ spec: value: "info" - name: FEATURE_GATES value: "" + - name: ALLOCATION_BATCH_WAIT_TIME + value: "500ms" ports: - name: https containerPort: 8443 diff --git a/pkg/gameserverallocations/allocator.go b/pkg/gameserverallocations/allocator.go index f5614241f5..9d09d56135 100644 --- a/pkg/gameserverallocations/allocator.go +++ b/pkg/gameserverallocations/allocator.go @@ -78,7 +78,6 @@ const ( allocatorPort = "443" maxBatchQueue = 100 maxBatchBeforeRefresh = 100 - batchWaitTime = 500 * time.Millisecond ) var allocationRetry = wait.Backoff{ @@ -108,6 +107,7 @@ type Allocator struct { remoteAllocationCallback func(context.Context, string, grpc.DialOption, *pb.AllocationRequest) (*pb.AllocationResponse, error) remoteAllocationTimeout time.Duration totalRemoteAllocationTimeout time.Duration + batchWaitTime time.Duration } // request is an async request for allocation @@ -125,7 +125,7 @@ type response struct { // NewAllocator creates an instance of Allocator func NewAllocator(policyInformer multiclusterinformerv1.GameServerAllocationPolicyInformer, secretInformer informercorev1.SecretInformer, gameServerGetter getterv1.GameServersGetter, - kubeClient kubernetes.Interface, allocationCache *AllocationCache, remoteAllocationTimeout time.Duration, totalRemoteAllocationTimeout time.Duration) *Allocator { + kubeClient kubernetes.Interface, allocationCache *AllocationCache, remoteAllocationTimeout time.Duration, totalRemoteAllocationTimeout time.Duration, batchWaitTime time.Duration) *Allocator { ah := &Allocator{ pendingRequests: make(chan request, maxBatchQueue), allocationPolicyLister: policyInformer.Lister(), @@ -134,6 +134,7 @@ func NewAllocator(policyInformer multiclusterinformerv1.GameServerAllocationPoli secretSynced: secretInformer.Informer().HasSynced, gameServerGetter: gameServerGetter, allocationCache: allocationCache, + batchWaitTime: batchWaitTime, remoteAllocationTimeout: remoteAllocationTimeout, totalRemoteAllocationTimeout: totalRemoteAllocationTimeout, remoteAllocationCallback: func(ctx context.Context, endpoint string, dialOpts grpc.DialOption, request *pb.AllocationRequest) (*pb.AllocationResponse, error) { @@ -531,7 +532,7 @@ func (c *Allocator) ListenAndAllocate(ctx context.Context, updateWorkerCount int list = nil requestCount = 0 // slow down cpu churn, and allow items to batch - time.Sleep(batchWaitTime) + time.Sleep(c.batchWaitTime) } } } diff --git a/pkg/gameserverallocations/allocator_test.go b/pkg/gameserverallocations/allocator_test.go index 718e9309c2..0f4a9ae3f8 100644 --- a/pkg/gameserverallocations/allocator_test.go +++ b/pkg/gameserverallocations/allocator_test.go @@ -242,7 +242,7 @@ func TestAllocatorApplyAllocationToGameServer(t *testing.T) { m.KubeInformerFactory.Core().V1().Secrets(), m.AgonesClient.AgonesV1(), m.KubeClient, NewAllocationCache(m.AgonesInformerFactory.Agones().V1().GameServers(), gameservers.NewPerNodeCounter(m.KubeInformerFactory, m.AgonesInformerFactory), healthcheck.NewHandler()), - time.Second, 5*time.Second, + time.Second, 5*time.Second, 500*time.Millisecond, ) gs, err := allocator.applyAllocationToGameServer(ctx, allocationv1.MetaPatch{}, &agonesv1.GameServer{}) @@ -281,7 +281,7 @@ func TestAllocationApplyAllocationError(t *testing.T) { m.KubeInformerFactory.Core().V1().Secrets(), m.AgonesClient.AgonesV1(), m.KubeClient, NewAllocationCache(m.AgonesInformerFactory.Agones().V1().GameServers(), gameservers.NewPerNodeCounter(m.KubeInformerFactory, m.AgonesInformerFactory), healthcheck.NewHandler()), - time.Second, 5*time.Second, + time.Second, 5*time.Second, 500*time.Millisecond, ) gsa, err := allocator.applyAllocationToGameServer(ctx, allocationv1.MetaPatch{}, &agonesv1.GameServer{}) @@ -686,7 +686,8 @@ func newFakeAllocator() (*Allocator, agtesting.Mocks) { m.KubeClient, NewAllocationCache(m.AgonesInformerFactory.Agones().V1().GameServers(), counter, healthcheck.NewHandler()), time.Second, - 5*time.Second) + 5*time.Second, + 500*time.Millisecond) a.recorder = m.FakeRecorder return a, m diff --git a/pkg/gameserverallocations/controller.go b/pkg/gameserverallocations/controller.go index 0995ce0358..f8601f9113 100644 --- a/pkg/gameserverallocations/controller.go +++ b/pkg/gameserverallocations/controller.go @@ -61,6 +61,7 @@ func NewController(apiServer *apiserver.APIServer, agonesInformerFactory externalversions.SharedInformerFactory, remoteAllocationTimeout time.Duration, totalAllocationTimeout time.Duration, + allocationBatchWaitTime time.Duration, ) *Controller { c := &Controller{ api: apiServer, @@ -71,7 +72,8 @@ func NewController(apiServer *apiserver.APIServer, kubeClient, NewAllocationCache(agonesInformerFactory.Agones().V1().GameServers(), counter, health), remoteAllocationTimeout, - totalAllocationTimeout), + totalAllocationTimeout, + allocationBatchWaitTime), } c.baseLogger = runtime.NewLoggerWithType(c) diff --git a/pkg/gameserverallocations/controller_test.go b/pkg/gameserverallocations/controller_test.go index 403d44e80a..29366c9a9b 100644 --- a/pkg/gameserverallocations/controller_test.go +++ b/pkg/gameserverallocations/controller_test.go @@ -845,7 +845,7 @@ func newFakeControllerWithTimeout(remoteAllocationTimeout time.Duration, totalRe m.Mux = http.NewServeMux() counter := gameservers.NewPerNodeCounter(m.KubeInformerFactory, m.AgonesInformerFactory) api := apiserver.NewAPIServer(m.Mux) - c := NewController(api, healthcheck.NewHandler(), counter, m.KubeClient, m.KubeInformerFactory, m.AgonesClient, m.AgonesInformerFactory, remoteAllocationTimeout, totalRemoteAllocationTimeout) + c := NewController(api, healthcheck.NewHandler(), counter, m.KubeClient, m.KubeInformerFactory, m.AgonesClient, m.AgonesInformerFactory, remoteAllocationTimeout, totalRemoteAllocationTimeout, 500*time.Millisecond) c.recorder = m.FakeRecorder c.allocator.recorder = m.FakeRecorder return c, m diff --git a/site/content/en/docs/Installation/Install Agones/helm.md b/site/content/en/docs/Installation/Install Agones/helm.md index c7ef947d59..fa4a1143ca 100644 --- a/site/content/en/docs/Installation/Install Agones/helm.md +++ b/site/content/en/docs/Installation/Install Agones/helm.md @@ -206,17 +206,18 @@ The following tables lists the configurable parameters of the Agones chart and t {{% feature publishVersion="1.24.0" %}} **New Configuration Features:** -| Parameter | Description | Default | -| --------------------------------------------------- | ----------------------------------------------------------------------------------------------- | ---------------------- | -| | | | -| `agones.controller.disableSecret` | Disables the creation of any allocator secrets. If true, you MUST provide the `{agones.releaseName}-cert` secrets before installation. | `false` | -| `agones.controller.customCertSecretPath` | Remap cert-manager path to server.crt and server.key | `{}` | -| `agones.controller.allocationApiService.annotations` | [Annotations][annotations] added to the Agones apiregistration | `{}` | -| `agones.controller.allocationApiService.disableCaBundle` | Disable ca-bundle so it can be injected by cert-manager | `false` | -| `agones.controller.validatingWebhook.annotations` | [Annotations][annotations] added to the Agones validating webhook | `{}` | -| `agones.controller.validatingWebhook.disableCaBundle` | Disable ca-bundle so it can be injected by cert-manager | `false` | -| `agones.controller.mutatingWebhook.annotations` | [Annotations][annotations] added to the Agones mutating webhook | `{}` | -| `agones.controller.mutatingWebhook.disableCaBundle` | Disable ca-bundle so it can be injected by cert-manager | `false` | +| Parameter | Description | Default | +|----------------------------------------------------------|----------------------------------------------------------------------------------------------------------------------------------------|---------| +| `agones.controller.disableSecret` | Disables the creation of any allocator secrets. If true, you MUST provide the `{agones.releaseName}-cert` secrets before installation. | `false` | +| `agones.controller.customCertSecretPath` | Remap cert-manager path to server.crt and server.key | `{}` | +| `agones.controller.allocationApiService.annotations` | [Annotations][annotations] added to the Agones apiregistration | `{}` | +| `agones.controller.allocationApiService.disableCaBundle` | Disable ca-bundle so it can be injected by cert-manager | `false` | +| `agones.controller.validatingWebhook.annotations` | [Annotations][annotations] added to the Agones validating webhook | `{}` | +| `agones.controller.validatingWebhook.disableCaBundle` | Disable ca-bundle so it can be injected by cert-manager | `false` | +| `agones.controller.mutatingWebhook.annotations` | [Annotations][annotations] added to the Agones mutating webhook | `{}` | +| `agones.controller.mutatingWebhook.disableCaBundle` | Disable ca-bundle so it can be injected by cert-manager | `false` | +| `agones.controller.allocationBatchWaitTime` | Wait time between each allocation batch when performing allocations in controller mode | `500ms` | +| `agones.allocator.allocationBatchWaitTime` | Wait time between each allocation batch when performing allocations in allocator mode | `500ms` | {{% /feature %}} [toleration]: https://kubernetes.io/docs/concepts/configuration/taint-and-toleration/