From ff774e6e62a652d4473e2398110ff796aa1e420b Mon Sep 17 00:00:00 2001 From: Antonio Ojea Date: Sun, 12 May 2024 14:36:06 +0000 Subject: [PATCH] network device injector plugin The network device inject plugin allow to inject network interfaces that are present in the host to the Pods. The network interface can be renamed and a network configuration can be passed. It is important to differentiate between network interface injection and CNI, as today, CNI is used for container runtimes to provider the network configuration, it performs also the creation and the configuration of the interfaces that are injected into the Pod namespace and provide as a result some properties like the assiged IPs that are consumed later by the upstream projects like Kubernetes. Signed-off-by: Antonio Ojea --- README.md | 1 + plugins/network-device-injector/README.md | 97 ++++++ plugins/network-device-injector/go.mod | 28 ++ plugins/network-device-injector/go.sum | 104 ++++++ .../network-device-injector/host-device.go | 223 +++++++++++++ .../network-device-injector.go | 306 ++++++++++++++++++ .../sample-network-device-inject.yaml | 50 +++ 7 files changed, 809 insertions(+) create mode 100644 plugins/network-device-injector/README.md create mode 100644 plugins/network-device-injector/go.mod create mode 100644 plugins/network-device-injector/go.sum create mode 100644 plugins/network-device-injector/host-device.go create mode 100644 plugins/network-device-injector/network-device-injector.go create mode 100644 plugins/network-device-injector/sample-network-device-inject.yaml diff --git a/README.md b/README.md index f0987098..9fccfb29 100644 --- a/README.md +++ b/README.md @@ -308,6 +308,7 @@ The following sample plugins exist for NRI: - [logger](plugins/logger) - [differ](plugins/differ) - [device injector](plugins/device-injector) + - [network device injector](plugins/network-device-injector) - [OCI hook injector](plugins/hook-injector) - [ulimit adjuster](plugins/ulimit-adjuster) - [NRI v0.1.0 plugin adapter](plugins/v010-adapter) diff --git a/plugins/network-device-injector/README.md b/plugins/network-device-injector/README.md new file mode 100644 index 00000000..5f635411 --- /dev/null +++ b/plugins/network-device-injector/README.md @@ -0,0 +1,97 @@ +## Network Device Injector Plugin + +This sample plugin can inject existing network devices into containers using pod annotations. +Network devices are network namespaced, this implies that in Kubernetes they are Pod scoped +and not container scoped; all containers are able to access the network device inside the Pod. + +Traditionally in Kubernetes the CNI plugin is responsible for configuring the default network +interface for Pods, but there are use cases where the Pod may need to use additional network interfaces. +A more detailed explanation of all the possible technologies to add interfaces to Pods was presented during +[SIG Network meeting 14/03/2024](https://www.youtube.com/watch?v=67UzeMEaqnM&list=PL69nYSiGNLP2E8vmnqo5MwPOY25sDWIxb&index=1), +[slides](Slides in https://docs.google.com/presentation/d/1pjDCtpdbCSWaqCbBYWgzTxAewOVbMf6rUS5SbjAJAe8/edit?usp=sharing). + +The Kubernetes project is working to [provide a better API](https://docs.google.com/document/d/1VBBj8Fh0ks0_-dacpqx6kD2tlIvj0XfFxtMuSfOJ22w/edit) +introducing network device claims that would naturally provide a built in means to inject. + +[Network Devices may be included in the OCI Runtime Specification](https://github.com/opencontainers/runtime-spec/issues/1239), this will allow +implementations to be more declarative offloading the low level implementation details to the runtime implementation. + +Pods that run in the host network namespace can not inject any network device as those are already running on the same network namespace, +and any modification can impact the existing system networking. + +### Network Device Annotations + +Network devices are annotated using the `netdevices.nri.containerd.io` annotation key prefix. +Network devices are defined at the Pod level, since are part of the network namespace. + +The annotation syntax for network device injection is + +``` +- name: enp2s2f0 + new_name: eth1 + address: 192.168.2.2 + prefix: 24 + mtu: 1500 +- name: enp2s2f1 + ... +``` + +The parameters are based on the existing linux netdevice representation. +https://man7.org/linux/man-pages/man7/netdevice.7.html + +`name` is mandatory and refers to the name of the network interface in the host, +the rest of the parameters are optional. +`new_name` is the name of the interface inside the Pod. + +The plugin only injects interfaces on the Pod network namespace for which the containers are attached when created, +for more advanced networking configuration like routing, traffic redirection or dynamic address configuration new plugins can be created. + +## Testing + +You can test this plugin using a kubernetes cluster/node with a container +runtime that has NRI support enabled. Start the plugin on the target node +(`network-device-injector -idx 10`), create a pod with some annotated network devices or +mounts, then verify that those get injected to the containers according +to the annotations. + +On the same node where the plugin is running create a dummy interface: + +``` +ip link add dummy0 type dummy +``` + +You can validate the interface state with the following command + +``` +$ ip link show dev dummy0 +81: dummy0: mtu 1500 qdisc noop state DOWN mode DEFAULT group default qlen 1000 + link/ether fa:57:1c:81:0b:98 brd ff:ff:ff:ff:ff:ff +``` + +See the [sample pod spec](sample-network-device-inject.yaml) for an example. + +Once the Pod is running you'll be able to check that the `dummy0` interface is no longer +present in the node, and is now inside the Pod with the new name and network configuration +passed on the annotation. + +``` +kubectl exec -it bbdev0 ip a +kubectl exec [POD] [COMMAND] is DEPRECATED and will be removed in a future version. Use kubectl exec [POD] -- [COMMAND] instead. +Defaulted container "c0" out of: c0, c1 +1: lo: mtu 65536 qdisc noqueue qlen 1000 + link/loopback 00:00:00:00:00:00 brd 00:00:00:00:00:00 + inet 127.0.0.1/8 scope host lo + valid_lft forever preferred_lft forever + inet6 ::1/128 scope host + valid_lft forever preferred_lft forever +2: eth0@if80: mtu 1500 qdisc noqueue + link/ether de:1d:9b:0f:83:b3 brd ff:ff:ff:ff:ff:ff + inet 10.244.1.76/24 brd 10.244.1.255 scope global eth0 + valid_lft forever preferred_lft forever + inet6 fe80::dc1d:9bff:fe0f:83b3/64 scope link + valid_lft forever preferred_lft forever +79: eth33: mtu 1500 qdisc noop qlen 1000 + link/ether 3a:74:86:94:75:6b brd ff:ff:ff:ff:ff:ff + inet 192.168.2.2/24 brd 192.168.2.255 scope global eth33 + valid_lft forever preferred_lft forever +``` diff --git a/plugins/network-device-injector/go.mod b/plugins/network-device-injector/go.mod new file mode 100644 index 00000000..f6df97b7 --- /dev/null +++ b/plugins/network-device-injector/go.mod @@ -0,0 +1,28 @@ +module github.com/containerd/nri/plugins/network-device-injector + +go 1.22.0 + +require ( + github.com/containerd/nri v0.2.0 + github.com/containernetworking/plugins v1.4.1 + github.com/sirupsen/logrus v1.9.3 + github.com/vishvananda/netlink v1.2.1-beta.2 + sigs.k8s.io/yaml v1.4.0 +) + +require ( + github.com/containerd/ttrpc v1.2.3 // indirect + github.com/gogo/protobuf v1.3.2 // indirect + github.com/kr/text v0.2.0 // indirect + github.com/opencontainers/runtime-spec v1.2.0 // indirect + github.com/vishvananda/netns v0.0.4 // indirect + golang.org/x/net v0.25.0 // indirect + golang.org/x/sys v0.20.0 // indirect + golang.org/x/text v0.15.0 // indirect + google.golang.org/genproto/googleapis/rpc v0.0.0-20240509183442-62759503f434 // indirect + google.golang.org/grpc v1.63.2 // indirect + google.golang.org/protobuf v1.34.1 // indirect + k8s.io/cri-api v0.30.0 // indirect +) + +replace github.com/containerd/nri => ../.. diff --git a/plugins/network-device-injector/go.sum b/plugins/network-device-injector/go.sum new file mode 100644 index 00000000..8fea2e80 --- /dev/null +++ b/plugins/network-device-injector/go.sum @@ -0,0 +1,104 @@ +github.com/containerd/ttrpc v1.2.3 h1:4jlhbXIGvijRtNC8F/5CpuJZ7yKOBFGFOOXg1bkISz0= +github.com/containerd/ttrpc v1.2.3/go.mod h1:ieWsXucbb8Mj9PH0rXCw1i8IunRbbAiDkpXkbfflWBM= +github.com/containernetworking/cni v1.1.2 h1:wtRGZVv7olUHMOqouPpn3cXJWpJgM6+EUl31EQbXALQ= +github.com/containernetworking/cni v1.1.2/go.mod h1:sDpYKmGVENF3s6uvMvGgldDWeG8dMxakj/u+i9ht9vw= +github.com/containernetworking/plugins v1.4.1 h1:+sJRRv8PKhLkXIl6tH1D7RMi+CbbHutDGU+ErLBORWA= +github.com/containernetworking/plugins v1.4.1/go.mod h1:n6FFGKcaY4o2o5msgu/UImtoC+fpQXM3076VHfHbj60= +github.com/creack/pty v1.1.9/go.mod h1:oKZEueFk5CKHvIhNR5MUki03XCEU+Q6VDXinZuGJ33E= +github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= +github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c= +github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= +github.com/go-logr/logr v1.4.1 h1:pKouT5E8xu9zeFC39JXRDukb6JFQPXM5p5I91188VAQ= +github.com/go-logr/logr v1.4.1/go.mod h1:9T104GzyrTigFIr8wt5mBrctHMim0Nb2HLGrmQ40KvY= +github.com/go-task/slim-sprig v0.0.0-20230315185526-52ccab3ef572 h1:tfuBGBXKqDEevZMzYi5KSi8KkcZtzBcTgAUUtapy0OI= +github.com/go-task/slim-sprig v0.0.0-20230315185526-52ccab3ef572/go.mod h1:9Pwr4B2jHnOSGXyyzV8ROjYa2ojvAY6HCGYYfMoC3Ls= +github.com/gogo/protobuf v1.3.2 h1:Ov1cvc58UF3b5XjBnZv7+opcTcQFZebYjWzi34vdm4Q= +github.com/gogo/protobuf v1.3.2/go.mod h1:P1XiOD3dCwIKUDQYPy72D8LYyHL2YPYrpS2s69NZV8Q= +github.com/google/go-cmp v0.5.9/go.mod h1:17dUlkBOakJ0+DkrSSNjCkIjxS6bF9zb3elmeNGIjoY= +github.com/google/go-cmp v0.6.0 h1:ofyhxvXcZhMsU5ulbFiLKl/XBFqE1GSq7atu8tAmTRI= +github.com/google/go-cmp v0.6.0/go.mod h1:17dUlkBOakJ0+DkrSSNjCkIjxS6bF9zb3elmeNGIjoY= +github.com/google/pprof v0.0.0-20230323073829-e72429f035bd h1:r8yyd+DJDmsUhGrRBxH5Pj7KeFK5l+Y3FsgT8keqKtk= +github.com/google/pprof v0.0.0-20230323073829-e72429f035bd/go.mod h1:79YE0hCXdHag9sBkw2o+N/YnZtTkXi0UT9Nnixa5eYk= +github.com/kisielk/errcheck v1.5.0/go.mod h1:pFxgyoBC7bSaBwPgfKdkLd5X25qrDl4LWUI2bnpBCr8= +github.com/kisielk/gotool v1.0.0/go.mod h1:XhKaO+MFFWcvkIS/tQcRk01m1F5IRFswLeQ+oQHNcck= +github.com/kr/pretty v0.3.1 h1:flRD4NNwYAUpkphVc1HcthR4KEIFJ65n8Mw5qdRn3LE= +github.com/kr/pretty v0.3.1/go.mod h1:hoEshYVHaxMs3cyo3Yncou5ZscifuDolrwPKZanG3xk= +github.com/kr/text v0.2.0 h1:5Nx0Ya0ZqY2ygV366QzturHI13Jq95ApcVaJBhpS+AY= +github.com/kr/text v0.2.0/go.mod h1:eLer722TekiGuMkidMxC/pM04lWEeraHUUmBw8l2grE= +github.com/onsi/ginkgo/v2 v2.16.0 h1:7q1w9frJDzninhXxjZd+Y/x54XNjG/UlRLIYPZafsPM= +github.com/onsi/ginkgo/v2 v2.16.0/go.mod h1:llBI3WDLL9Z6taip6f33H76YcWtJv+7R3HigUjbIBOs= +github.com/onsi/gomega v1.31.1 h1:KYppCUK+bUgAZwHOu7EXVBKyQA6ILvOESHkn/tgoqvo= +github.com/onsi/gomega v1.31.1/go.mod h1:y40C95dwAD1Nz36SsEnxvfFe8FFfNxzI5eJ0EYGyAy0= +github.com/opencontainers/runtime-spec v1.2.0 h1:z97+pHb3uELt/yiAWD691HNHQIF07bE7dzrbT927iTk= +github.com/opencontainers/runtime-spec v1.2.0/go.mod h1:jwyrGlmzljRJv/Fgzds9SsS/C5hL+LL3ko9hs6T5lQ0= +github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM= +github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= +github.com/prometheus/procfs v0.6.0 h1:mxy4L2jP6qMonqmq+aTtOx1ifVWUgG/TAmntgbh3xv4= +github.com/prometheus/procfs v0.6.0/go.mod h1:cz+aTbrPOrUb4q7XlbU9ygM+/jj0fzG6c1xBZuNvfVA= +github.com/rogpeppe/go-internal v1.10.0 h1:TMyTOH3F/DB16zRVcYyreMH6GnZZrwQVAoYjRBZyWFQ= +github.com/rogpeppe/go-internal v1.10.0/go.mod h1:UQnix2H7Ngw/k4C5ijL5+65zddjncjaFoBhdsK/akog= +github.com/sirupsen/logrus v1.9.3 h1:dueUQJ1C2q9oE3F7wvmSGAaVtTmUizReu6fjN8uqzbQ= +github.com/sirupsen/logrus v1.9.3/go.mod h1:naHLuLoDiP4jHNo9R0sCBMtWGeIprob74mVsIT4qYEQ= +github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME= +github.com/stretchr/testify v1.7.0/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg= +github.com/stretchr/testify v1.8.4 h1:CcVxjf3Q8PM0mHUKJCdn+eZZtm5yQwehR5yeSVQQcUk= +github.com/stretchr/testify v1.8.4/go.mod h1:sz/lmYIOXD/1dqDmKjjqLyZ2RngseejIcXlSw2iwfAo= +github.com/vishvananda/netlink v1.2.1-beta.2 h1:Llsql0lnQEbHj0I1OuKyp8otXp0r3q0mPkuhwHfStVs= +github.com/vishvananda/netlink v1.2.1-beta.2/go.mod h1:twkDnbuQxJYemMlGd4JFIcuhgX83tXhKS2B/PRMpOho= +github.com/vishvananda/netns v0.0.0-20200728191858-db3c7e526aae/go.mod h1:DD4vA1DwXk04H54A1oHXtwZmA0grkVMdPxx/VGLCah0= +github.com/vishvananda/netns v0.0.4 h1:Oeaw1EM2JMxD51g9uhtC0D7erkIjgmj8+JZc26m1YX8= +github.com/vishvananda/netns v0.0.4/go.mod h1:SpkAiCQRtJ6TvvxPnOSyH3BMl6unz3xZlaprSwhNNJM= +github.com/yuin/goldmark v1.1.27/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74= +github.com/yuin/goldmark v1.2.1/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74= +golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w= +golang.org/x/crypto v0.0.0-20191011191535-87dc89f01550/go.mod h1:yigFU9vqHzYiE8UmvKecakEJjdnWj3jj499lnFckfCI= +golang.org/x/crypto v0.0.0-20200622213623-75b288015ac9/go.mod h1:LzIPMQfyMNhhGPhUkYOs5KpL4U8rLKemX1yGLhDgUto= +golang.org/x/mod v0.2.0/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA= +golang.org/x/mod v0.3.0/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA= +golang.org/x/net v0.0.0-20190404232315-eb5bcb51f2a3/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg= +golang.org/x/net v0.0.0-20190620200207-3b0461eec859/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= +golang.org/x/net v0.0.0-20200226121028-0de0cce0169b/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= +golang.org/x/net v0.0.0-20201021035429-f5854403a974/go.mod h1:sp8m0HH+o8qH0wwXwYZr8TS3Oi6o0r6Gce1SSxlDquU= +golang.org/x/net v0.25.0 h1:d/OCCoBEUq33pjydKrGQhw7IlUPI2Oylr+8qLx49kac= +golang.org/x/net v0.25.0/go.mod h1:JkAGAh7GEvH74S6FOH42FLoXpXbE/aqXSrIQjXgsiwM= +golang.org/x/sync v0.0.0-20190423024810-112230192c58/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= +golang.org/x/sync v0.0.0-20190911185100-cd5d95a43a6e/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= +golang.org/x/sync v0.0.0-20201020160332-67f06af15bc9/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= +golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= +golang.org/x/sys v0.0.0-20190412213103-97732733099d/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20200217220822-9197077df867/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20200728102440-3e129f6d46b1/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20200930185726-fdedc70b468f/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20220715151400-c0bba94af5f8/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= +golang.org/x/sys v0.20.0 h1:Od9JTbYCk261bKm4M/mw7AklTlFYIa0bIp9BgSm1S8Y= +golang.org/x/sys v0.20.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA= +golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= +golang.org/x/text v0.3.3/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ= +golang.org/x/text v0.15.0 h1:h1V/4gjBv8v9cjcR6+AR5+/cIYK5N/WAgiv4xlsEtAk= +golang.org/x/text v0.15.0/go.mod h1:18ZOQIKpY8NJVqYksKHtTdi31H5itFRjB5/qKTNYzSU= +golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ= +golang.org/x/tools v0.0.0-20191119224855-298f0cb1881e/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo= +golang.org/x/tools v0.0.0-20200619180055-7c47624df98f/go.mod h1:EkVYQZoAsY45+roYkvgYkIh4xh/qjgUK9TdY2XT94GE= +golang.org/x/tools v0.0.0-20210106214847-113979e3529a/go.mod h1:emZCQorbCU4vsT4fOWvOPXz4eW1wZW4PmDk9uLelYpA= +golang.org/x/tools v0.17.0 h1:FvmRgNOcs3kOa+T20R1uhfP9F6HgG2mfxDv1vrx1Htc= +golang.org/x/tools v0.17.0/go.mod h1:xsh6VxdV005rRVaS6SSAf9oiAqljS7UZUacMZ8Bnsps= +golang.org/x/xerrors v0.0.0-20190717185122-a985d3407aa7/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= +golang.org/x/xerrors v0.0.0-20191011141410-1b5146add898/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= +golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= +golang.org/x/xerrors v0.0.0-20200804184101-5ec99f83aff1/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= +google.golang.org/genproto/googleapis/rpc v0.0.0-20240509183442-62759503f434 h1:umK/Ey0QEzurTNlsV3R+MfxHAb78HCEX/IkuR+zH4WQ= +google.golang.org/genproto/googleapis/rpc v0.0.0-20240509183442-62759503f434/go.mod h1:I7Y+G38R2bu5j1aLzfFmQfTcU/WnFuqDwLZAbvKTKpM= +google.golang.org/grpc v1.63.2 h1:MUeiw1B2maTVZthpU5xvASfTh3LDbxHd6IJ6QQVU+xM= +google.golang.org/grpc v1.63.2/go.mod h1:WAX/8DgncnokcFUldAxq7GeB5DXHDbMF+lLvDomNkRA= +google.golang.org/protobuf v1.34.1 h1:9ddQBjfCyZPOHPUiPxpYESBLc+T8P3E+Vo4IbKZgFWg= +google.golang.org/protobuf v1.34.1/go.mod h1:c6P6GXX6sHbq/GpV6MGZEdwhWPcYBgnhAHhKbcUYpos= +gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= +gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c h1:Hei/4ADfdWqJk1ZMxUNpqntNwaWcugrBjAiHlqqRiVk= +gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c/go.mod h1:JHkPIbrfpd72SG/EVd6muEfDQjcINNoR0C8j2r3qZ4Q= +gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= +gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA= +gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= +k8s.io/cri-api v0.30.0 h1:hZqh3vH5JZdqeAyhD9nPXSbT6GDgrtPJkPiIzhWKVhk= +k8s.io/cri-api v0.30.0/go.mod h1://4/umPJSW1ISNSNng4OwjpkvswJOQwU8rnkvO8P+xg= +sigs.k8s.io/yaml v1.4.0 h1:Mk1wCc2gy/F0THH0TAp1QYyJNzRm2KCLy3o5ASXVI5E= +sigs.k8s.io/yaml v1.4.0/go.mod h1:Ejl7/uTz7PSA4eKMyQCUTnhZYNmLIl+5c2lQPGR2BPY= diff --git a/plugins/network-device-injector/host-device.go b/plugins/network-device-injector/host-device.go new file mode 100644 index 00000000..2cb3bd35 --- /dev/null +++ b/plugins/network-device-injector/host-device.go @@ -0,0 +1,223 @@ +/* + Copyright The containerd Authors. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +// Copyright 2015 CNI authors +// Copied from https://github.com/containernetworking/plugins/blob/9f1bf2a84828d2c16ea5912b53c0b6048bd00e7a/plugins/main/host-device/host-device.go on 2024-05-23 + +package main + +import ( + "fmt" + "net" + + "github.com/containernetworking/plugins/pkg/ns" + "github.com/vishvananda/netlink" +) + +// setTempName sets a temporary name for netdevice to avoid collisions with interfaces names. +func setTempName(dev netlink.Link) (netlink.Link, error) { + tempName := fmt.Sprintf("%s%d", "temp_", dev.Attrs().Index) + + // rename to tempName + if err := netlink.LinkSetName(dev, tempName); err != nil { + return nil, fmt.Errorf("failed to rename device %q to %q: %v", dev.Attrs().Name, tempName, err) + } + + // Get updated Link obj + tempDev, err := netlink.LinkByName(tempName) + if err != nil { + return nil, fmt.Errorf("failed to find %q after rename to %q: %v", dev.Attrs().Name, tempName, err) + } + + return tempDev, nil +} + +func moveLinkIn(hostDev netlink.Link, containerNs ns.NetNS, ifName string) (netlink.Link, error) { + origLinkFlags := hostDev.Attrs().Flags + hostDevName := hostDev.Attrs().Name + defaultNs, err := ns.GetCurrentNS() + if err != nil { + return nil, fmt.Errorf("failed to get host namespace: %v", err) + } + + // Devices can be renamed only when down + if err = netlink.LinkSetDown(hostDev); err != nil { + return nil, fmt.Errorf("failed to set %q down: %v", hostDev.Attrs().Name, err) + } + + // restore original link state in case of error + defer func() { + if err != nil { + if origLinkFlags&net.FlagUp == net.FlagUp && hostDev != nil { + _ = netlink.LinkSetUp(hostDev) + } + } + }() + + hostDev, err = setTempName(hostDev) + if err != nil { + return nil, fmt.Errorf("failed to rename device %q to temporary name: %v", hostDevName, err) + } + + // restore original netdev name in case of error + defer func() { + if err != nil && hostDev != nil { + _ = netlink.LinkSetName(hostDev, hostDevName) + } + }() + + if err = netlink.LinkSetNsFd(hostDev, int(containerNs.Fd())); err != nil { + return nil, fmt.Errorf("failed to move %q to container ns: %v", hostDev.Attrs().Name, err) + } + + var contDev netlink.Link + tempDevName := hostDev.Attrs().Name + if err = containerNs.Do(func(_ ns.NetNS) error { + var err error + contDev, err = netlink.LinkByName(tempDevName) + if err != nil { + return fmt.Errorf("failed to find %q: %v", tempDevName, err) + } + + // move netdev back to host namespace in case of error + defer func() { + if err != nil { + _ = netlink.LinkSetNsFd(contDev, int(defaultNs.Fd())) + // we need to get updated link object as link was moved back to host namepsace + _ = defaultNs.Do(func(_ ns.NetNS) error { + hostDev, _ = netlink.LinkByName(tempDevName) + return nil + }) + } + }() + + // Save host device name into the container device's alias property + if err = netlink.LinkSetAlias(contDev, hostDevName); err != nil { + return fmt.Errorf("failed to set alias to %q: %v", tempDevName, err) + } + // Rename container device to respect args.IfName + if err = netlink.LinkSetName(contDev, ifName); err != nil { + return fmt.Errorf("failed to rename device %q to %q: %v", tempDevName, ifName, err) + } + + // restore tempDevName in case of error + defer func() { + if err != nil { + _ = netlink.LinkSetName(contDev, tempDevName) + } + }() + + // Bring container device up + if err = netlink.LinkSetUp(contDev); err != nil { + return fmt.Errorf("failed to set %q up: %v", ifName, err) + } + + // bring device down in case of error + defer func() { + if err != nil { + _ = netlink.LinkSetDown(contDev) + } + }() + + // Retrieve link again to get up-to-date name and attributes + contDev, err = netlink.LinkByName(ifName) + if err != nil { + return fmt.Errorf("failed to find %q: %v", ifName, err) + } + return nil + }); err != nil { + return nil, err + } + + return contDev, nil +} + +func moveLinkOut(containerNs ns.NetNS, ifName string) error { + defaultNs, err := ns.GetCurrentNS() + if err != nil { + return err + } + defer defaultNs.Close() + + var tempName string + var origDev netlink.Link + err = containerNs.Do(func(_ ns.NetNS) error { + dev, err := netlink.LinkByName(ifName) + if err != nil { + return fmt.Errorf("failed to find %q: %v", ifName, err) + } + origDev = dev + + // Devices can be renamed only when down + if err = netlink.LinkSetDown(dev); err != nil { + return fmt.Errorf("failed to set %q down: %v", ifName, err) + } + + defer func() { + // If moving the device to the host namespace fails, set its name back to ifName so that this + // function can be retried. Also bring the device back up, unless it was already down before. + if err != nil { + _ = netlink.LinkSetName(dev, ifName) + if dev.Attrs().Flags&net.FlagUp == net.FlagUp { + _ = netlink.LinkSetUp(dev) + } + } + }() + + newLink, err := setTempName(dev) + if err != nil { + return fmt.Errorf("failed to rename device %q to temporary name: %v", ifName, err) + } + dev = newLink + tempName = dev.Attrs().Name + + if err = netlink.LinkSetNsFd(dev, int(defaultNs.Fd())); err != nil { + return fmt.Errorf("failed to move %q to host netns: %v", tempName, err) + } + return nil + }) + + if err != nil { + return err + } + + // Rename the device to its original name from the host namespace + tempDev, err := netlink.LinkByName(tempName) + if err != nil { + return fmt.Errorf("failed to find %q in host namespace: %v", tempName, err) + } + + if err = netlink.LinkSetName(tempDev, tempDev.Attrs().Alias); err != nil { + // move device back to container ns so it may be retired + defer func() { + _ = netlink.LinkSetNsFd(tempDev, int(containerNs.Fd())) + _ = containerNs.Do(func(_ ns.NetNS) error { + lnk, err := netlink.LinkByName(tempName) + if err != nil { + return err + } + _ = netlink.LinkSetName(lnk, ifName) + if origDev.Attrs().Flags&net.FlagUp == net.FlagUp { + _ = netlink.LinkSetUp(lnk) + } + return nil + }) + }() + return fmt.Errorf("failed to restore %q to original name %q: %v", tempName, tempDev.Attrs().Alias, err) + } + + return nil +} diff --git a/plugins/network-device-injector/network-device-injector.go b/plugins/network-device-injector/network-device-injector.go new file mode 100644 index 00000000..ff093c44 --- /dev/null +++ b/plugins/network-device-injector/network-device-injector.go @@ -0,0 +1,306 @@ +/* + Copyright The containerd Authors. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +package main + +import ( + "context" + "flag" + "fmt" + "net" + "os" + "runtime" + "strings" + + "github.com/containernetworking/plugins/pkg/ns" + "github.com/sirupsen/logrus" + "github.com/vishvananda/netlink" + "sigs.k8s.io/yaml" + + "github.com/containerd/nri/pkg/api" + "github.com/containerd/nri/pkg/stub" +) + +const ( + // Prefix of the key used for network device annotations. + netdeviceKey = "netdevices.nri.containerd.io" +) + +var ( + log *logrus.Logger + verbose bool +) + +// an annotated netdevice +// https://man7.org/linux/man-pages/man7/netdevice.7.html +type netdevice struct { + Name string `json:"name"` // name in the runtime namespace + NewName string `json:"new_name"` // name inside the pod namespace + Address string `json:"address"` + Prefix int `json:"prefix"` + MTU int `json:"mtu"` +} + +func (n *netdevice) inject(nsPath string) error { + // Lock the OS Thread so we don't accidentally switch namespaces + runtime.LockOSThread() + defer runtime.UnlockOSThread() + + containerNs, err := ns.GetNS(nsPath) + if err != nil { + return err + } + defer containerNs.Close() + + hostDev, err := netlink.LinkByName(n.Name) + if err != nil { + return err + } + + _, err = moveLinkIn(hostDev, containerNs, n.NewName) + if err != nil { + return fmt.Errorf("failed to move link %v", err) + } + return nil +} + +// remove the network device from the Pod namespace and recover its name +// Leaves the interface in down state to avoid issues with the root network. +func (n *netdevice) release(nsPath string) error { + // Lock the OS Thread so we don't accidentally switch namespaces + runtime.LockOSThread() + defer runtime.UnlockOSThread() + + containerNs, err := ns.GetNS(nsPath) + if err != nil { + return err + } + defer containerNs.Close() + + err = moveLinkOut(containerNs, n.NewName) + if err != nil { + return err + } + + return nil +} + +// our injector plugin +type plugin struct { + stub stub.Stub +} + +func (p *plugin) RunPodSandbox(_ context.Context, pod *api.PodSandbox) error { + log.WithField("namespace", pod.GetNamespace()).WithField("name", pod.GetName).Debug("Started pod...") + if verbose { + dump("RunPodSandbox", "pod", pod) + } + + // inject associated netdevices (based on received pod annotations) into the pod + // network namespace that will be attached to the pod's containers + netdevices, err := parseNetdevices(pod.Annotations) + if err != nil { + return err + } + + if len(netdevices) == 0 { + return nil + } + + // get the pod network namespace + var ns string + for _, namespace := range pod.Linux.GetNamespaces() { + if namespace.Type == "network" { + ns = namespace.Path + break + } + } + + // Pods running on the host network namespace has this value empty + if ns == "" { + log.WithField("namespace", pod.GetNamespace()).WithField("name", pod.GetName).Info("Pod using host namespace, skipping ...") + return fmt.Errorf("trying to inject network device on host network Pod") + } + + // attach the network devices to the pod namespace + for _, n := range netdevices { + err = n.inject(ns) + if err != nil { + return nil + } + } + return nil +} + +func (p *plugin) StopPodSandbox(_ context.Context, pod *api.PodSandbox) error { + log.WithField("namespace", pod.GetNamespace()).WithField("name", pod.GetName).Debug("Stopped pod...") + if verbose { + dump("StopPodSandbox", "pod", pod) + } + // release associated devices of the netdevice to the Pod + netdevices, err := parseNetdevices(pod.Annotations) + if err != nil { + return err + } + + if len(netdevices) == 0 { + return nil + } + + // get the pod network namespace + var ns string + for _, namespace := range pod.Linux.GetNamespaces() { + if namespace.Type == "network" { + ns = namespace.Path + break + } + } + // TODO check host network namespace + if ns == "" { + return nil + } + + // release the network devices from the pod namespace + for _, n := range netdevices { + err = n.release(ns) + if err != nil { + return nil + } + } + + return nil +} + +func parseNetdevices(annotations map[string]string) ([]netdevice, error) { + var ( + key string + annotation []byte + netdevices []netdevice + ) + + // look up effective device annotation and unmarshal devices + for _, key = range []string{ + netdeviceKey + "/pod", + netdeviceKey, + } { + if value, ok := annotations[key]; ok { + annotation = []byte(value) + break + } + } + + if annotation == nil { + return nil, nil + } + + if err := yaml.Unmarshal(annotation, &netdevices); err != nil { + return nil, fmt.Errorf("invalid device annotation %q: %w", key, err) + } + + // validate and default + for _, n := range netdevices { + if n.NewName == "" { + n.NewName = n.Name + } + if n.Address != "" { + ip := net.ParseIP(n.Address) + if ip == nil { + return nil, fmt.Errorf("error parsing address %s", n.Address) + } + + if n.Prefix == 0 { + if ip.To4() == nil { + n.Prefix = 128 + } else { + n.Prefix = 32 + } + } + } + + } + return netdevices, nil +} + +// Dump one or more objects, with an optional global prefix and per-object tags. +func dump(args ...interface{}) { + var ( + prefix string + idx int + ) + + if len(args) == 1 { + prefix = args[0].(string) + idx++ + } + + for ; idx < len(args)-1; idx += 2 { + tag, obj := args[idx], args[idx+1] + msg, err := yaml.Marshal(obj) + if err != nil { + log.Infof("%s: %s: failed to dump object: %v", prefix, tag, err) + continue + } + + if prefix != "" { + log.Infof("%s: %s:", prefix, tag) + for _, line := range strings.Split(strings.TrimSpace(string(msg)), "\n") { + log.Infof("%s: %s", prefix, line) + } + } else { + log.Infof("%s:", tag) + for _, line := range strings.Split(strings.TrimSpace(string(msg)), "\n") { + log.Infof(" %s", line) + } + } + } +} + +func main() { + var ( + pluginName string + pluginIdx string + opts []stub.Option + err error + ) + + log = logrus.StandardLogger() + log.SetFormatter(&logrus.TextFormatter{ + PadLevelText: true, + }) + + flag.StringVar(&pluginName, "name", "", "plugin name to register to NRI") + flag.StringVar(&pluginIdx, "idx", "", "plugin index to register to NRI") + flag.BoolVar(&verbose, "verbose", false, "enable (more) verbose logging") + flag.Parse() + + if pluginName != "" { + opts = append(opts, stub.WithPluginName(pluginName)) + } + if pluginIdx != "" { + opts = append(opts, stub.WithPluginIdx(pluginIdx)) + } + + p := &plugin{} + if p.stub, err = stub.New(p, opts...); err != nil { + log.Fatalf("failed to create plugin stub: %v", err) + } + + err = p.stub.Run(context.Background()) + if err != nil { + log.Errorf("plugin exited with error %v", err) + os.Exit(1) + } +} diff --git a/plugins/network-device-injector/sample-network-device-inject.yaml b/plugins/network-device-injector/sample-network-device-inject.yaml new file mode 100644 index 00000000..9083e7ce --- /dev/null +++ b/plugins/network-device-injector/sample-network-device-inject.yaml @@ -0,0 +1,50 @@ +# Create a dummy interface on the node where the pod will run +# ip link add dummy0 type dummy +# Once this pod is running, you can verify the results by running +# kubectl exec -c c0 bbdev0 -- ip addr show dev eth33 +# kubectl exec -c c1 bbdev0 -- ip addr show dev eth33 +# +apiVersion: v1 +kind: Pod +metadata: + name: bbdev0 + labels: + app: bbdev0 + annotations: + netdevices.nri.io: |+ + - name: dummy0 + new_name: eth33 + address: 192.168.2.2 + prefix: 24 + mtu: 1500 +spec: + containers: + - name: c0 + image: busybox + imagePullPolicy: IfNotPresent + command: + - sh + - -c + - echo bbdev0c0 $(sleep inf) + resources: + requests: + cpu: 500m + memory: '100M' + limits: + cpu: 500m + memory: '100M' + - name: c1 + image: busybox + imagePullPolicy: IfNotPresent + command: + - sh + - -c + - echo bbdev0c1 $(sleep inf) + resources: + requests: + cpu: 1 + memory: '100M' + limits: + cpu: 1 + memory: '100M' + terminationGracePeriodSeconds: 1