diff --git a/pkg/epp/scheduling/config.go b/pkg/epp/scheduling/config.go index 4ed109af..4d1c1c4c 100644 --- a/pkg/epp/scheduling/config.go +++ b/pkg/epp/scheduling/config.go @@ -16,7 +16,10 @@ limitations under the License. package scheduling -import "sigs.k8s.io/gateway-api-inference-extension/pkg/epp/scheduling/plugins" +import ( + "sigs.k8s.io/gateway-api-inference-extension/pkg/epp/scheduling/plugins" + "sigs.k8s.io/gateway-api-inference-extension/pkg/epp/scheduling/plugins/scorers" +) type SchedulerConfig struct { preSchedulePlugins []plugins.PreSchedule @@ -32,10 +35,18 @@ var defPlugin = &defaultPlugin{} // it's possible to call NewSchedulerWithConfig to pass a different argument. // For build time plugins changes, it's recommended to change the defaultConfig variable in this file. -var defaultConfig = &SchedulerConfig{ - preSchedulePlugins: []plugins.PreSchedule{}, - filters: []plugins.Filter{defPlugin}, - scorers: map[plugins.Scorer]int{}, - picker: defPlugin, - postSchedulePlugins: []plugins.PostSchedule{}, +var defaultConfig = createDefaultConfig() + +func createDefaultConfig() *SchedulerConfig { + defConfig := &SchedulerConfig{ + preSchedulePlugins: []plugins.PreSchedule{}, + filters: []plugins.Filter{defPlugin}, + scorers: map[plugins.Scorer]int{}, + picker: defPlugin, + postSchedulePlugins: []plugins.PostSchedule{}, + } + + defConfig.scorers[scorers.NewLoadBasedScorer()] = 1.0 + + return defConfig } diff --git a/pkg/epp/scheduling/plugins/scorers/load_based_scorer.go b/pkg/epp/scheduling/plugins/scorers/load_based_scorer.go new file mode 100644 index 00000000..418375ef --- /dev/null +++ b/pkg/epp/scheduling/plugins/scorers/load_based_scorer.go @@ -0,0 +1,55 @@ +/* +Copyright 2025 The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ +package scorers + +import ( + "sigs.k8s.io/gateway-api-inference-extension/pkg/epp/scheduling/config" + "sigs.k8s.io/gateway-api-inference-extension/pkg/epp/scheduling/plugins" + "sigs.k8s.io/gateway-api-inference-extension/pkg/epp/scheduling/types" +) + +type LoadBasedScorer struct{} + +func NewLoadBasedScorer() plugins.Scorer { + return LoadBasedScorer{} +} + +func (s LoadBasedScorer) Name() string { + return "load based scorer" +} + +// Score scores the given pod in range of 0-1 +// Currently metrics contains number of requests waiting in the queue, there is no information about number of requests +// that can be processed in the given pod immediately. +// Pod with empty waiting requests queue is scored with 0.5 +// Pod with requests in the queue will get score between 0.5 and 0. +// Score 0 will get pod with number of requests in the queue equal to the threshold used in load-based filter (QueueingThresholdLoRA) +// In future pods with additional capacity will get score higher than 0.5 +func (s LoadBasedScorer) Score(ctx *types.SchedulingContext, pods []types.Pod) map[types.Pod]float64 { + scoredPods := make(map[types.Pod]float64) + + for _, pod := range pods { + waitingRequests := float64(pod.GetMetrics().WaitingQueueSize) + + if waitingRequests == 0 { + scoredPods[pod] = 0.5 + } else { + scoredPods[pod] = 0.5 * (1.0 - (waitingRequests / float64(config.Conf.QueueingThresholdLoRA))) + } + } + + return scoredPods +}