Skip to content

Commit

Permalink
GPUs != Neuron Devices
Browse files Browse the repository at this point in the history
Signed-off-by: Davanum Srinivas <davanum@gmail.com>
  • Loading branch information
dims committed Feb 24, 2025
1 parent 270bd7a commit f96b8d7
Show file tree
Hide file tree
Showing 16 changed files with 42 additions and 17 deletions.
5 changes: 2 additions & 3 deletions go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ go 1.24.0

require (
github.com/Masterminds/semver/v3 v3.3.1
github.com/aws/amazon-ec2-instance-selector/v2 v2.4.2-0.20231216170552-14d4dfcbaadf
github.com/aws/amazon-ec2-instance-selector/v3 v3.1.1-0.20250224180552-36eea73b44c2
github.com/aws/aws-sdk-go v1.55.6
github.com/aws/aws-sdk-go-v2 v1.36.2
github.com/aws/aws-sdk-go-v2/config v1.29.7
Expand Down Expand Up @@ -253,7 +253,6 @@ require (
github.com/hexops/gotextdiff v1.0.3 // indirect
github.com/huandu/xstrings v1.5.0 // indirect
github.com/iancoleman/strcase v0.3.0 // indirect
github.com/imdario/mergo v0.3.16 // indirect
github.com/inconshreveable/mousetrap v1.1.0 // indirect
github.com/jessevdk/go-flags v1.6.1 // indirect
github.com/jgautheron/goconst v1.7.1 // indirect
Expand Down Expand Up @@ -311,7 +310,7 @@ require (
github.com/muesli/ansi v0.0.0-20230316100256-276c6243b2f6 // indirect
github.com/muesli/cancelreader v0.2.2 // indirect
github.com/muesli/reflow v0.3.0 // indirect
github.com/muesli/termenv v0.15.2 // indirect
github.com/muesli/termenv v0.16.0 // indirect
github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822 // indirect
github.com/mxk/go-flowrate v0.0.0-20140419014527-cca7078d478f // indirect
github.com/nakabonne/nestif v0.3.1 // indirect
Expand Down
12 changes: 8 additions & 4 deletions go.sum
Original file line number Diff line number Diff line change
Expand Up @@ -102,8 +102,10 @@ github.com/ashanbrown/makezero v1.2.0 h1:/2Lp1bypdmK9wDIq7uWBlDF1iMUpIIS4A+pF6C9
github.com/ashanbrown/makezero v1.2.0/go.mod h1:dxlPhHbDMC6N6xICzFBSK+4njQDdK8euNO0qjQMtGY4=
github.com/atotto/clipboard v0.1.4 h1:EH0zSVneZPSuFR11BlR9YppQTVDbh5+16AmcJi4g1z4=
github.com/atotto/clipboard v0.1.4/go.mod h1:ZY9tmq7sm5xIbd9bOK4onWV4S6X0u6GY7Vn0Yu86PYI=
github.com/aws/amazon-ec2-instance-selector/v2 v2.4.2-0.20231216170552-14d4dfcbaadf h1:1zems5/6/Fs+1dFsjTZ+oSogVHkfGl1VWuttRXYGx+0=
github.com/aws/amazon-ec2-instance-selector/v2 v2.4.2-0.20231216170552-14d4dfcbaadf/go.mod h1:zsxolOKwtNEvoOPScJy5+Bu8F72LZy7pqVJNhP8tqVE=
github.com/aws/amazon-ec2-instance-selector/v3 v3.1.0 h1:NtSErNyyzyMzV3RXD3HGMTYUHD+XcaHbAMQHFaoU5Y4=
github.com/aws/amazon-ec2-instance-selector/v3 v3.1.0/go.mod h1:S8Yga4m3aMYvvCDWE4DA72hywLmvY/yknG45QiW0l/M=
github.com/aws/amazon-ec2-instance-selector/v3 v3.1.1-0.20250224180552-36eea73b44c2 h1:vy7b8q6Cwn3j3HgzRBT7N99POtT1g6SuXlID9CI1yp8=
github.com/aws/amazon-ec2-instance-selector/v3 v3.1.1-0.20250224180552-36eea73b44c2/go.mod h1:RU/lVVsYHNN7Bwr2UmCw5z2aWPcNIHADY49bj082oYM=
github.com/aws/aws-sdk-go v1.55.6 h1:cSg4pvZ3m8dgYcgqB97MrcdjUmZ1BeMYKUxMMB89IPk=
github.com/aws/aws-sdk-go v1.55.6/go.mod h1:eRwEWoyTWFMVYVQzKMNHWP5/RV4xIUGMQfXQHfHkpNU=
github.com/aws/aws-sdk-go-v2 v1.36.2 h1:Ub6I4lq/71+tPb/atswvToaLGVMxKZvjYDVOWEExOcU=
Expand Down Expand Up @@ -555,8 +557,6 @@ github.com/iancoleman/strcase v0.3.0/go.mod h1:iwCmte+B7n89clKwxIoIXy/HfoL7AsD47
github.com/ianlancetaylor/demangle v0.0.0-20200824232613-28f6c0f3b639/go.mod h1:aSSvb/t6k1mPoxDqO4vJh6VOCGPwU4O0C2/Eqndh1Sc=
github.com/ianlancetaylor/demangle v0.0.0-20240312041847-bd984b5ce465/go.mod h1:gx7rwoVhcfuVKG5uya9Hs3Sxj7EIvldVofAWIUtGouw=
github.com/imdario/mergo v0.3.9/go.mod h1:2EnlNZ0deacrJVfApfmtdGgDfMuh/nq6Ok1EcJh5FfA=
github.com/imdario/mergo v0.3.16 h1:wwQJbIsHYGMUyLSPrEq1CT16AhnhNJQ51+4fdHUnCl4=
github.com/imdario/mergo v0.3.16/go.mod h1:WBLT9ZmE3lPoWsEzCh9LPo3TiwVN+ZKEjmz+hD27ysY=
github.com/inconshreveable/mousetrap v1.1.0 h1:wN+x4NVGpMsO7ErUn/mUI3vEoE6Jt13X2s0bqwp9tc8=
github.com/inconshreveable/mousetrap v1.1.0/go.mod h1:vpF70FUmC8bwa3OWnCshd2FqLfsEA9PFc4w1p2J65bw=
github.com/jessevdk/go-flags v1.6.1 h1:Cvu5U8UGrLay1rZfv/zP7iLpSHGUZ/Ou68T0iX1bBK4=
Expand Down Expand Up @@ -728,6 +728,8 @@ github.com/muesli/reflow v0.3.0 h1:IFsN6K9NfGtjeggFP+68I4chLZV2yIKsXJFNZ+eWh6s=
github.com/muesli/reflow v0.3.0/go.mod h1:pbwTDkVPibjO2kyvBQRBxTWEEGDGq0FlB1BIKtnHY/8=
github.com/muesli/termenv v0.15.2 h1:GohcuySI0QmI3wN8Ok9PtKGkgkFIk7y6Vpb5PvrY+Wo=
github.com/muesli/termenv v0.15.2/go.mod h1:Epx+iuz8sNs7mNKhxzH4fWXGNpZwUaJKRS1noLXviQ8=
github.com/muesli/termenv v0.16.0 h1:S5AlUN9dENB57rsbnkPyfdGuWIlkmzJjbFf0Tf5FWUc=
github.com/muesli/termenv v0.16.0/go.mod h1:ZRfOIKPFDYQoDFF4Olj7/QJbW60Ol/kL1pU3VfY/Cnk=
github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822 h1:C3w9PqII01/Oq1c1nUAm88MOHcQC9l5mIlSMApZMrHA=
github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822/go.mod h1:+n7T8mK8HuQTcFwEeznm/DIxMOiR9yIdICNftLE1DvQ=
github.com/mwitkow/go-conntrack v0.0.0-20161129095857-cc309e4a2223/go.mod h1:qRWi+5nqEBWmkhHvq77mSJWrCKwh8bxhgT7d/eI7P4U=
Expand Down Expand Up @@ -907,6 +909,8 @@ github.com/sagikazarmark/slog-shim v0.1.0 h1:diDBnUNK9N/354PgrxMywXnAwEr1QZcOr6g
github.com/sagikazarmark/slog-shim v0.1.0/go.mod h1:SrcSrq8aKtyuqEI1uvTDTK1arOWRIczQRv+GVI1AkeQ=
github.com/sahilm/fuzzy v0.1.1 h1:ceu5RHF8DGgoi+/dR5PsECjCDH1BE3Fnmpo7aVXOdRA=
github.com/sahilm/fuzzy v0.1.1/go.mod h1:VFvziUEIMCrT6A6tw2RFIXPXXmzXbOsSHF0DOI8ZK9Y=
github.com/samber/lo v1.47.0 h1:z7RynLwP5nbyRscyvcD043DWYoOcYRv3mV8lBeqOCLc=
github.com/samber/lo v1.47.0/go.mod h1:RmDH9Ct32Qy3gduHQuKJ3gW1fMHAnE/fAzQuf6He5cU=
github.com/sanathkr/go-yaml v0.0.0-20170819195128-ed9d249f429b h1:jUK33OXuZP/l6babJtnLo1qsGvq6G9so9KMflGAm4YA=
github.com/sanathkr/go-yaml v0.0.0-20170819195128-ed9d249f429b/go.mod h1:8458kAagoME2+LN5//WxE71ysZ3B7r22fdgb7qVmXSY=
github.com/sanathkr/yaml v0.0.0-20170819201035-0056894fa522 h1:fOCp11H0yuyAt2wqlbJtbyPzSgaxHTv8uN1pMpkG1t8=
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,7 @@ var _ = Describe("(Integration) [Instance Selector test]", func() {
}, "--instance-selector-vcpus=8",
"--instance-selector-memory=32",
"--instance-selector-gpus=0",
"--instance-selector-neuron-devices=0",
),
Entry("with vCPUs and memory", nil,
"--instance-selector-vcpus=8",
Expand Down
3 changes: 3 additions & 0 deletions pkg/apis/eksctl.io/v1alpha5/types.go
Original file line number Diff line number Diff line change
Expand Up @@ -1999,6 +1999,9 @@ type InstanceSelector struct {
// GPUs specifies the number of GPUs.
// It can be set to 0 to select non-GPU instance types.
GPUs *int `json:"gpus,omitempty"`
// NeuronDevices specifies the number of Neuron device Accelerators.
// It can be set to 0 to select non-Accelerator instance types.
NeuronDevices *int32 `json:"neuron_devices,omitempty"`
// CPU Architecture of the EC2 instance type.
// Valid variants are:
// `"x86_64"`
Expand Down
8 changes: 8 additions & 0 deletions pkg/apis/eksctl.io/v1alpha5/validation.go
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,10 @@ var (
GPUDriversWarning = func(amiFamily string) string {
return fmt.Sprintf("%s does not ship with NVIDIA GPU drivers installed, hence won't support running GPU-accelerated workloads out of the box", amiFamily)
}

NeuronDeviceDriversWarning = func(amiFamily string) string {
return fmt.Sprintf("%s does not ship with Neuron Devices drivers installed, hence won't support running inference-accelerated workloads out of the box", amiFamily)
}
)

var (
Expand Down Expand Up @@ -736,6 +740,10 @@ func validateNodeGroupBase(np NodePool, path string, controlPlaneOnOutposts bool
(ng.InstanceSelector.GPUs == nil || *ng.InstanceSelector.GPUs != 0) {
logger.Warning("instance selector may/will select GPU instance types, " + GPUDriversWarning(ng.AMIFamily))
}
if ng.InstanceSelector != nil && !ng.InstanceSelector.IsZero() &&
(ng.InstanceSelector.NeuronDevices == nil || *ng.InstanceSelector.NeuronDevices != 0) {
logger.Warning("instance selector may/will select Neuron Device instance types, " + NeuronDeviceDriversWarning(ng.AMIFamily))
}
}

if ng.AMIFamily != NodeImageFamilyAmazonLinux2 &&
Expand Down
3 changes: 3 additions & 0 deletions pkg/ctl/cmdutils/configfile.go
Original file line number Diff line number Diff line change
Expand Up @@ -674,6 +674,9 @@ func normalizeBaseNodeGroup(np api.NodePool, cmd *cobra.Command) {
if !flags.Changed("instance-selector-gpus") {
ng.InstanceSelector.GPUs = nil
}
if !flags.Changed("instance-selector-neuron-devices") {
ng.InstanceSelector.NeuronDevices = nil
}
if !flags.Changed("enable-ssm") {
ng.SSH.EnableSSM = nil
}
Expand Down
1 change: 1 addition & 0 deletions pkg/ctl/cmdutils/nodegroup_flags.go
Original file line number Diff line number Diff line change
Expand Up @@ -75,6 +75,7 @@ func AddInstanceSelectorOptions(flagSetGroup *NamedFlagSetGroup, ng *api.NodeGro
fs.StringVar(&ng.InstanceSelector.Memory, "instance-selector-memory", "", "4 or 4GiB")
fs.StringVar(&ng.InstanceSelector.CPUArchitecture, "instance-selector-cpu-architecture", "", "x86_64, or arm64")
ng.InstanceSelector.GPUs = fs.Int("instance-selector-gpus", 0, "an integer value")
ng.InstanceSelector.NeuronDevices = fs.Int32("instance-selector-neuron-devices", 0, "an integer value")
})
}

Expand Down
2 changes: 1 addition & 1 deletion pkg/ctl/create/cluster.go
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ import (
kubeclient "k8s.io/client-go/kubernetes"
clientcmdlatest "k8s.io/client-go/tools/clientcmd/api/latest"

"github.com/aws/amazon-ec2-instance-selector/v2/pkg/selector"
"github.com/aws/amazon-ec2-instance-selector/v3/pkg/selector"
"github.com/weaveworks/eksctl/pkg/accessentry"
accessentryactions "github.com/weaveworks/eksctl/pkg/actions/accessentry"
"github.com/weaveworks/eksctl/pkg/actions/addon"
Expand Down
2 changes: 1 addition & 1 deletion pkg/ctl/create/nodegroup.go
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ import (
"fmt"
"io"

"github.com/aws/amazon-ec2-instance-selector/v2/pkg/selector"
"github.com/aws/amazon-ec2-instance-selector/v3/pkg/selector"

"github.com/kris-nova/logger"
"github.com/pkg/errors"
Expand Down
2 changes: 1 addition & 1 deletion pkg/ctl/get/nodegroup.go
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ import (
"strconv"
"time"

"github.com/aws/amazon-ec2-instance-selector/v2/pkg/selector"
"github.com/aws/amazon-ec2-instance-selector/v3/pkg/selector"
"github.com/kris-nova/logger"
"github.com/spf13/cobra"
"github.com/spf13/pflag"
Expand Down
2 changes: 1 addition & 1 deletion pkg/ctl/scale/nodegroup.go
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ package scale
import (
"context"

"github.com/aws/amazon-ec2-instance-selector/v2/pkg/selector"
"github.com/aws/amazon-ec2-instance-selector/v3/pkg/selector"

"github.com/spf13/cobra"
"github.com/spf13/pflag"
Expand Down
2 changes: 1 addition & 1 deletion pkg/ctl/update/nodegroup.go
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ package update
import (
"context"

"github.com/aws/amazon-ec2-instance-selector/v2/pkg/selector"
"github.com/aws/amazon-ec2-instance-selector/v3/pkg/selector"

"github.com/lithammer/dedent"
"github.com/spf13/cobra"
Expand Down
2 changes: 1 addition & 1 deletion pkg/ctl/upgrade/nodegroup.go
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ import (
"context"
"time"

"github.com/aws/amazon-ec2-instance-selector/v2/pkg/selector"
"github.com/aws/amazon-ec2-instance-selector/v3/pkg/selector"

"github.com/spf13/cobra"
"github.com/spf13/pflag"
Expand Down
2 changes: 1 addition & 1 deletion pkg/eks/fakes/fake_instance_selector.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

10 changes: 8 additions & 2 deletions pkg/eks/nodegroup_service.go
Original file line number Diff line number Diff line change
Expand Up @@ -7,8 +7,8 @@ import (
"regexp"
"strings"

"github.com/aws/amazon-ec2-instance-selector/v2/pkg/bytequantity"
"github.com/aws/amazon-ec2-instance-selector/v2/pkg/selector"
"github.com/aws/amazon-ec2-instance-selector/v3/pkg/bytequantity"
"github.com/aws/amazon-ec2-instance-selector/v3/pkg/selector"
"github.com/aws/aws-sdk-go-v2/aws"
"github.com/kris-nova/logger"
"github.com/pkg/errors"
Expand Down Expand Up @@ -203,6 +203,12 @@ func (n *NodeGroupService) expandInstanceSelector(ins *api.InstanceSelector, azs
if ins.GPUs != nil {
filters.GpusRange = makeRange(*ins.GPUs)
}
if ins.NeuronDevices != nil {
filters.InferenceAcceleratorsRange = &selector.Int32RangeFilter{
LowerBound: *ins.NeuronDevices,
UpperBound: *ins.NeuronDevices,
}
}
cpuArch := ins.CPUArchitecture
if cpuArch == "" {
cpuArch = defaultCPUArch
Expand Down
2 changes: 1 addition & 1 deletion userdocs/src/usage/instance-selector.md
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,7 @@ $ eksctl create cluster -f instance-selector-cluster.yaml

The following instance selector CLI options are supported by `eksctl create cluster` and `eksctl create nodegroup`:

`--instance-selector-vcpus`, `--instance-selector-memory`, `--instance-selector-gpus` and `instance-selector-cpu-architecture`
`--instance-selector-vcpus`, `--instance-selector-memory`, `--instance-selector-gpus`, `--instance-selector-neuron-devices` and `instance-selector-cpu-architecture`

???+ note
By default, GPU instance types are not filtered out. If you wish to do so (e.g. for cost effectiveness, when your applications don't particularly benefit from GPU-accelerated workloads), please explicitly set `gpus: 0` (via config file) or `--instance-selector-gpus=0` (via CLI flag).
Expand Down

0 comments on commit f96b8d7

Please sign in to comment.