Skip to content

Commit

Permalink
feat(robot): forward InternalIPs by default on Robot nodes (#865)
Browse files Browse the repository at this point in the history
Whenever a user configures the `--node-ip` flag, we forward the
configured IPs. We validate if the IP already exists as an ExternalIP
and does not collide with the configured address family. This improves
the Robot support for HCCM, as users can configure private IPs for Robot
nodes.

Forwarding only happens during the initialization phase of HCCM and
should therefore not break existing clusters.

This feature can be disabled by setting the environment variable
`ROBOT_FORWARD_INTERNAL_IPS` to `false`.

---------
Co-authored-by: Alexander Block <ablock84@gmail.com>
Co-authored-by: Julian Tölle <julian.toelle@hetzner-cloud.de>
  • Loading branch information
lukasmetzner authored Mar 4, 2025
1 parent 427ea35 commit 7b97ce2
Show file tree
Hide file tree
Showing 6 changed files with 206 additions and 41 deletions.
1 change: 1 addition & 0 deletions docs/robot.md
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,7 @@ The Node controller adds information about the server to the Node object. The va
- Addresses
- We add the Hostname and (depending on the configuration and availability) the IPv4 and IPv6 addresses of the server in `Node.status.addresses`.
- For the IPv6 address we use the first address in the Network -> For the network `2a01:f48:111:4221::` we add the address `2a01:f48:111:4221::1`.
- By default, we pass along InternalIPs configured via the kubelet flag `--node-ip`. This can be disabled by setting the environment variable `ROBOT_FORWARD_INTERNAL_IPS` to `false`. It is not allowed to configure the same IP for InternalIP and ExternalIP.
- Private IPs in a vSwitch are not supported.

### Node Lifecycle Controller
Expand Down
82 changes: 62 additions & 20 deletions hcloud/instances.go
Original file line number Diff line number Diff line change
Expand Up @@ -20,11 +20,13 @@ import (
"context"
"errors"
"fmt"
"net"

hrobotmodels "github.com/syself/hrobot-go/models"
corev1 "k8s.io/api/core/v1"
"k8s.io/client-go/tools/record"
cloudprovider "k8s.io/cloud-provider"
"k8s.io/klog/v2"

"github.com/hetznercloud/hcloud-cloud-controller-manager/internal/config"
"github.com/hetznercloud/hcloud-cloud-controller-manager/internal/metrics"
Expand All @@ -34,7 +36,8 @@ import (
)

const (
ProvidedBy = "instance.hetzner.cloud/provided-by"
ProvidedBy = "instance.hetzner.cloud/provided-by"
MisconfiguredInternalIP = "MisconfiguredInternalIP"
)

type instances struct {
Expand Down Expand Up @@ -106,7 +109,7 @@ func (i *instances) lookupServer(
return nil, nil
}

return robotServer{server, i.robotClient}, nil
return robotServer{server, i.robotClient, i.recorder}, nil
}

// If the node has no provider ID we try to find the server by name from
Expand Down Expand Up @@ -139,7 +142,7 @@ func (i *instances) lookupServer(
case cloudServer != nil:
return hcloudServer{cloudServer}, nil
case hrobotServer != nil:
return robotServer{hrobotServer, i.robotClient}, nil
return robotServer{hrobotServer, i.robotClient, i.recorder}, nil
default:
// Both nil
return nil, nil
Expand Down Expand Up @@ -196,7 +199,7 @@ func (i *instances) InstanceMetadata(ctx context.Context, node *corev1.Node) (*c
op, node.Name, errServerNotFound)
}

metadata, err := server.Metadata(i.networkID, i.cfg)
metadata, err := server.Metadata(i.networkID, node, i.cfg)
if err != nil {
return nil, fmt.Errorf("%s: %w", op, err)
}
Expand Down Expand Up @@ -254,13 +257,12 @@ func hcloudNodeAddresses(

func robotNodeAddresses(
server *hrobotmodels.Server,
node *corev1.Node,
cfg config.HCCMConfiguration,
recorder record.EventRecorder,
) []corev1.NodeAddress {
var addresses []corev1.NodeAddress
addresses = append(
addresses,
corev1.NodeAddress{Type: corev1.NodeHostName, Address: server.Name},
)
addresses = append(addresses, corev1.NodeAddress{Type: corev1.NodeHostName, Address: server.Name})

dualStack := cfg.Instance.AddressFamily == config.AddressFamilyDualStack
ipv4 := cfg.Instance.AddressFamily == config.AddressFamilyIPv4 || dualStack
Expand All @@ -269,18 +271,56 @@ func robotNodeAddresses(
if ipv6 {
// For a given IPv6 network of 2a01:f48:111:4221::, the instance address is 2a01:f48:111:4221::1
hostAddress := server.ServerIPv6Net + "1"

addresses = append(
addresses,
corev1.NodeAddress{Type: corev1.NodeExternalIP, Address: hostAddress},
)
addresses = append(addresses, corev1.NodeAddress{Type: corev1.NodeExternalIP, Address: hostAddress})
}

if ipv4 {
addresses = append(
addresses,
corev1.NodeAddress{Type: corev1.NodeExternalIP, Address: server.ServerIP},
)
addresses = append(addresses, corev1.NodeAddress{Type: corev1.NodeExternalIP, Address: server.ServerIP})
}

if cfg.Robot.ForwardInternalIPs {
OUTER:
for _, currentAddress := range node.Status.Addresses {
if currentAddress.Type != corev1.NodeInternalIP {
continue
}

ip := net.ParseIP(currentAddress.Address)
isIPv4 := ip.To4() != nil

var warnMsg string
if isIPv4 && ipv6 && !dualStack {
warnMsg = fmt.Sprintf(
"Configured InternalIP is IPv4 even though IPv6 only is configured. As a result, %s is not added as an InternalIP",
currentAddress.Address,
)
} else if !isIPv4 && ipv4 && !dualStack {
warnMsg = fmt.Sprintf(
"Configured InternalIP is IPv6 even though IPv4 only is configured. As a result, %s is not added as an InternalIP",
currentAddress.Address,
)
}

if warnMsg != "" {
recorder.Event(node, corev1.EventTypeWarning, MisconfiguredInternalIP, warnMsg)
klog.Warning(warnMsg)
continue
}

for _, address := range addresses {
if currentAddress.Address == address.Address {
warnMsg := fmt.Sprintf(
"Configured InternalIP already exists as an ExternalIP. As a result, %s is not added as an InternalIP",
currentAddress.Address,
)
recorder.Event(node, corev1.EventTypeWarning, MisconfiguredInternalIP, warnMsg)
klog.Warning(warnMsg)
continue OUTER
}
}

addresses = append(addresses, currentAddress)
}
}

return addresses
Expand All @@ -290,6 +330,7 @@ type genericServer interface {
IsShutdown() (bool, error)
Metadata(
networkID int64,
node *corev1.Node,
cfg config.HCCMConfiguration,
) (*cloudprovider.InstanceMetadata, error)
}
Expand All @@ -302,7 +343,7 @@ func (s hcloudServer) IsShutdown() (bool, error) {
return s.Status == hcloud.ServerStatusOff, nil
}

func (s hcloudServer) Metadata(networkID int64, cfg config.HCCMConfiguration) (*cloudprovider.InstanceMetadata, error) {
func (s hcloudServer) Metadata(networkID int64, _ *corev1.Node, cfg config.HCCMConfiguration) (*cloudprovider.InstanceMetadata, error) {
return &cloudprovider.InstanceMetadata{
ProviderID: providerid.FromCloudServerID(s.ID),
InstanceType: s.ServerType.Name,
Expand All @@ -318,6 +359,7 @@ func (s hcloudServer) Metadata(networkID int64, cfg config.HCCMConfiguration) (*
type robotServer struct {
*hrobotmodels.Server
robotClient robot.Client
recorder record.EventRecorder
}

func (s robotServer) IsShutdown() (bool, error) {
Expand All @@ -331,11 +373,11 @@ func (s robotServer) IsShutdown() (bool, error) {
return resetStatus.OperatingStatus == "shut off", nil
}

func (s robotServer) Metadata(_ int64, cfg config.HCCMConfiguration) (*cloudprovider.InstanceMetadata, error) {
func (s robotServer) Metadata(_ int64, node *corev1.Node, cfg config.HCCMConfiguration) (*cloudprovider.InstanceMetadata, error) {
return &cloudprovider.InstanceMetadata{
ProviderID: providerid.FromRobotServerNumber(s.ServerNumber),
InstanceType: getInstanceTypeOfRobotServer(s.Server),
NodeAddresses: robotNodeAddresses(s.Server, cfg),
NodeAddresses: robotNodeAddresses(s.Server, node, cfg, s.recorder),
Zone: getZoneOfRobotServer(s.Server),
Region: getRegionOfRobotServer(s.Server),
AdditionalLabels: map[string]string{
Expand Down
77 changes: 71 additions & 6 deletions hcloud/instances_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -598,11 +598,12 @@ func TestNodeAddresses(t *testing.T) {

func TestNodeAddressesRobotServer(t *testing.T) {
tests := []struct {
name string
addressFamily config.AddressFamily
server *hrobotmodels.Server
privateNetwork int
expected []corev1.NodeAddress
name string
addressFamily config.AddressFamily
server *hrobotmodels.Server
nodeStatusNodeAddresses []corev1.NodeAddress
privateNetwork int
expected []corev1.NodeAddress
}{
{
name: "public ipv4",
Expand Down Expand Up @@ -644,6 +645,61 @@ func TestNodeAddressesRobotServer(t *testing.T) {
{Type: corev1.NodeExternalIP, Address: "203.0.113.7"},
},
},
{
name: "public ipv4 and internal ip",
addressFamily: config.AddressFamilyIPv4,
nodeStatusNodeAddresses: []corev1.NodeAddress{
{
Type: corev1.NodeInternalIP,
Address: "10.0.1.2",
},
},
server: &hrobotmodels.Server{
Name: "foobar",
ServerIP: "203.0.113.7",
},
expected: []corev1.NodeAddress{
{Type: corev1.NodeHostName, Address: "foobar"},
{Type: corev1.NodeExternalIP, Address: "203.0.113.7"},
{Type: corev1.NodeInternalIP, Address: "10.0.1.2"},
},
},
{
name: "configured InternalIP is also ExternalIP",
addressFamily: config.AddressFamilyIPv4,
nodeStatusNodeAddresses: []corev1.NodeAddress{
{
Type: corev1.NodeInternalIP,
Address: "203.0.113.7",
},
},
server: &hrobotmodels.Server{
Name: "foobar",
ServerIP: "203.0.113.7",
},
expected: []corev1.NodeAddress{
{Type: corev1.NodeHostName, Address: "foobar"},
{Type: corev1.NodeExternalIP, Address: "203.0.113.7"},
},
},
{
name: "configured InternalIP does not fit configured AddressFamily",
addressFamily: config.AddressFamilyIPv4,
nodeStatusNodeAddresses: []corev1.NodeAddress{
{
Type: corev1.NodeInternalIP,
Address: "2001:db8:1234::",
},
},
server: &hrobotmodels.Server{
Name: "foobar",
ServerIP: "203.0.113.7",
},
expected: []corev1.NodeAddress{
{Type: corev1.NodeHostName, Address: "foobar"},
{Type: corev1.NodeExternalIP, Address: "203.0.113.7"},
},
},
}

for _, test := range tests {
Expand All @@ -653,7 +709,16 @@ func TestNodeAddressesRobotServer(t *testing.T) {
cfg, err := config.Read()
assert.NoError(t, err)

addresses := robotNodeAddresses(test.server, cfg)
node := &corev1.Node{
Status: corev1.NodeStatus{
Addresses: []corev1.NodeAddress{},
},
}
if test.nodeStatusNodeAddresses != nil {
node.Status.Addresses = test.nodeStatusNodeAddresses
}

addresses := robotNodeAddresses(test.server, node, cfg, &MockEventRecorder{})

if !reflect.DeepEqual(addresses, test.expected) {
t.Fatalf("%s: expected addresses %+v but got %+v", test.name, test.expected, addresses)
Expand Down
21 changes: 21 additions & 0 deletions hcloud/instances_util.go
Original file line number Diff line number Diff line change
Expand Up @@ -24,12 +24,33 @@ import (

hrobotmodels "github.com/syself/hrobot-go/models"
corev1 "k8s.io/api/core/v1"
"k8s.io/apimachinery/pkg/runtime"

"github.com/hetznercloud/hcloud-cloud-controller-manager/internal/metrics"
"github.com/hetznercloud/hcloud-cloud-controller-manager/internal/robot"
"github.com/hetznercloud/hcloud-go/v2/hcloud"
)

type MockEventRecorder struct{}

func (er *MockEventRecorder) Event(_ runtime.Object, _, _, _ string) {
}

func (er *MockEventRecorder) Eventf(
_ runtime.Object,
_, _, _ string,
_ ...interface{},
) {
}

func (er *MockEventRecorder) AnnotatedEventf(
_ runtime.Object,
_ map[string]string,
_, _, _ string,
_ ...interface{},
) {
}

func getCloudServerByName(ctx context.Context, c *hcloud.Client, name string) (*hcloud.Server, error) {
const op = "hcloud/getCloudServerByName"
metrics.OperationCalled.WithLabelValues(op).Inc()
Expand Down
19 changes: 14 additions & 5 deletions internal/config/config.go
Original file line number Diff line number Diff line change
Expand Up @@ -18,11 +18,12 @@ const (
hcloudNetwork = "HCLOUD_NETWORK"
hcloudDebug = "HCLOUD_DEBUG"

robotEnabled = "ROBOT_ENABLED"
robotUser = "ROBOT_USER"
robotPassword = "ROBOT_PASSWORD"
robotCacheTimeout = "ROBOT_CACHE_TIMEOUT"
robotRateLimitWaitTime = "ROBOT_RATE_LIMIT_WAIT_TIME"
robotEnabled = "ROBOT_ENABLED"
robotUser = "ROBOT_USER"
robotPassword = "ROBOT_PASSWORD"
robotCacheTimeout = "ROBOT_CACHE_TIMEOUT"
robotRateLimitWaitTime = "ROBOT_RATE_LIMIT_WAIT_TIME"
robotForwardInternalIPs = "ROBOT_FORWARD_INTERNAL_IPS"

hcloudInstancesAddressFamily = "HCLOUD_INSTANCES_ADDRESS_FAMILY"

Expand Down Expand Up @@ -53,6 +54,8 @@ type RobotConfiguration struct {
Password string
CacheTimeout time.Duration
RateLimitWaitTime time.Duration
// ForwardInternalIPs is enabled by default.
ForwardInternalIPs bool
}

type MetricsConfiguration struct {
Expand Down Expand Up @@ -143,6 +146,12 @@ func Read() (HCCMConfiguration, error) {
if err != nil {
errs = append(errs, err)
}
cfg.Robot.ForwardInternalIPs, err = getEnvBool(robotForwardInternalIPs, true)
if err != nil {
errs = append(errs, err)
}
// Robot needs to be enabled
cfg.Robot.ForwardInternalIPs = cfg.Robot.ForwardInternalIPs && cfg.Robot.Enabled

cfg.Metrics.Enabled, err = getEnvBool(hcloudMetricsEnabled, true)
if err != nil {
Expand Down
Loading

0 comments on commit 7b97ce2

Please sign in to comment.