From 953130af8df8a63e748fe0cdbb21647eee22ffbd Mon Sep 17 00:00:00 2001 From: Evyatar Date: Sun, 23 Jun 2024 18:33:46 +0300 Subject: [PATCH] Small refactor to pod credentials reconciler to also support auto restart for pods --- .../controllers/metadata/annotations.go | 3 + .../db_credentials_pod_reconciler.go | 216 ++++++++++++++++-- src/operator/go.mod | 2 +- src/operator/go.sum | 6 +- 4 files changed, 205 insertions(+), 22 deletions(-) diff --git a/src/operator/controllers/metadata/annotations.go b/src/operator/controllers/metadata/annotations.go index bd62413..8173622 100644 --- a/src/operator/controllers/metadata/annotations.go +++ b/src/operator/controllers/metadata/annotations.go @@ -6,6 +6,9 @@ const ( // UserAndPasswordSecretNameAnnotation is the name of the secret in which the user and password are stored UserAndPasswordSecretNameAnnotation = "credentials-operator.otterize.com/user-password-secret-name" + // RestartOnSecretRotation signals the + RestartOnSecretRotation = "credentials-operator.otterize.com/restart-on-secret-rotation" + // TLSSecretNameAnnotation is the name of the K8s secret in which the certificate data is stored TLSSecretNameAnnotation = "credentials-operator.otterize.com/tls-secret-name" TLSSecretNameAnnotationDeprecated = "spire-integration.otterize.com/tls-secret-name" diff --git a/src/operator/controllers/poduserpassword/db_credentials_pod_reconciler.go b/src/operator/controllers/poduserpassword/db_credentials_pod_reconciler.go index bde0477..791b1c9 100644 --- a/src/operator/controllers/poduserpassword/db_credentials_pod_reconciler.go +++ b/src/operator/controllers/poduserpassword/db_credentials_pod_reconciler.go @@ -14,6 +14,7 @@ import ( "github.com/samber/lo" "github.com/sirupsen/logrus" "github.com/spf13/viper" + appsv1 "k8s.io/api/apps/v1" v1 "k8s.io/api/core/v1" apierrors "k8s.io/apimachinery/pkg/api/errors" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" @@ -23,16 +24,19 @@ import ( ctrl "sigs.k8s.io/controller-runtime" "sigs.k8s.io/controller-runtime/pkg/client" "sigs.k8s.io/controller-runtime/pkg/controller" + "sigs.k8s.io/controller-runtime/pkg/handler" + "sigs.k8s.io/controller-runtime/pkg/reconcile" "strings" "time" ) const ( - ReasonEnsuredPodUserAndPassword = "EnsuredPodUserAndPassword" - ReasonGeneratePodDatabaseUserFailed = "GeneratePodDatabaseUserFailed" - ReasonEnsuringPodUserAndPasswordFailed = "EnsuringPodUserAndPasswordFailed" - ReasonEnsuringDatabasePasswordFailed = "EnsuringDatabasePasswordFailed" - ReasonRotatingSecretFailed = "RotatingSecretFailed" + ReasonEnsuredPodUserAndPassword = "EnsuredPodUserAndPassword" + ReasonGeneratePodDatabaseUserFailed = "GeneratePodDatabaseUserFailed" + ReasonEnsuringPodUserAndPasswordFailed = "EnsuringPodUserAndPasswordFailed" + ReasonEnsuringDatabasePasswordFailed = "EnsuringDatabasePasswordFailed" + ReasonRotatingSecretFailed = "RotatingSecretFailed" + ReasonRestartingPodAfterSecretRotationFailed = "RestartingPodAfterSecretRotationFailed" ) const ( @@ -58,6 +62,7 @@ func NewReconciler(client client.Client, scheme *runtime.Scheme, eventRecorder r func (e *Reconciler) SetupWithManager(mgr ctrl.Manager) error { return ctrl.NewControllerManagedBy(mgr). WithOptions(controller.Options{RecoverPanic: lo.ToPtr(true)}). + Watches(&v1.Secret{}, handler.EnqueueRequestsFromMapFunc(e.watchSecretDeletion)). For(&v1.Pod{}). Complete(e) } @@ -66,6 +71,11 @@ func (e *Reconciler) shouldHandleCredentialsForPod(pod v1.Pod) bool { return pod.Annotations != nil && hasUserAndPasswordSecretAnnotation(pod) } +func hasRestartAnnotation(pod v1.Pod) bool { + _, ok := pod.Annotations[metadata.RestartOnSecretRotation] + return ok +} + func hasUserAndPasswordSecretAnnotation(pod v1.Pod) bool { _, ok := pod.Annotations[metadata.UserAndPasswordSecretNameAnnotation] return ok @@ -109,21 +119,45 @@ func (e *Reconciler) Reconcile(ctx context.Context, req ctrl.Request) (ctrl.Resu return ctrl.Result{}, errors.Wrap(err) } + secretName := pod.Annotations[metadata.UserAndPasswordSecretNameAnnotation] logrus.Debug("Ensuring user-password credentials secrets for pod") - password, err := e.ensurePodUserAndPasswordSecret(ctx, &pod, pod.Annotations[metadata.UserAndPasswordSecretNameAnnotation], username) + result, createdSecret, err := e.ensurePodUserAndPasswordSecret(ctx, &pod, secretName, username) if err != nil { e.recorder.Eventf(&pod, v1.EventTypeWarning, ReasonEnsuringPodUserAndPasswordFailed, "Failed to ensure user-password credentials secret: %s", err.Error()) return ctrl.Result{}, errors.Wrap(err) } - if hasDatabaseAccessAnnotation(pod) { - logrus.Debug("Validating password in all databases") - err = e.ensurePasswordInDatabases(ctx, pod, username, password) - if err != nil { + if result.Requeue { + return result, nil + } + + if createdSecret { + secret := v1.Secret{} + if err := e.client.Get(ctx, types.NamespacedName{Namespace: pod.Namespace, Name: secretName}, &secret); err != nil { return ctrl.Result{}, errors.Wrap(err) } + password := string(secret.Data["password"]) + + if hasDatabaseAccessAnnotation(pod) { + logrus.Debug("Validating password in all databases") + err = e.ensurePasswordInDatabases(ctx, pod, username, password) + if err != nil { + return ctrl.Result{}, errors.Wrap(err) + } + } + + if hasRestartAnnotation(pod) { + logrus.Info("Triggering pod restart for newly created secret") + err := e.TriggerPodRestart(ctx, &pod) + if err != nil { + e.recorder.Eventf(&pod, v1.EventTypeWarning, + ReasonRestartingPodAfterSecretRotationFailed, "Failed restarting pod after secret creation: %s", err.Error()) + } + } + + e.recorder.Event(&pod, v1.EventTypeNormal, ReasonEnsuredPodUserAndPassword, "Ensured user-password credentials in specified secret") } - e.recorder.Event(&pod, v1.EventTypeNormal, ReasonEnsuredPodUserAndPassword, "Ensured user-password credentials in specified secret") + return ctrl.Result{}, nil } @@ -142,30 +176,38 @@ func (e *Reconciler) generateServiceDatabaseUsername(ctx context.Context, pod *v return clusterutils.KubernetesToPostgresName(username), nil } -func (e *Reconciler) ensurePodUserAndPasswordSecret(ctx context.Context, pod *v1.Pod, secretName string, username string) (string, error) { +func (e *Reconciler) ensurePodUserAndPasswordSecret(ctx context.Context, pod *v1.Pod, secretName string, username string) (ctrl.Result, bool, error) { log := logrus.WithFields(logrus.Fields{"pod": pod.Name, "namespace": pod.Namespace}) secret := v1.Secret{} err := e.client.Get(ctx, types.NamespacedName{Namespace: pod.Namespace, Name: secretName}, &secret) + if apierrors.IsNotFound(err) { log.Debug("Creating user-password credentials secret for pod") password, err := databaseconfigurator.GenerateRandomPassword() if err != nil { - return "", errors.Wrap(err) + return ctrl.Result{}, false, errors.Wrap(err) } secret := buildUserAndPasswordCredentialsSecret(secretName, pod.Namespace, username, password) log.WithField("secret", secretName).Debug("Creating new secret with user-password credentials") if err := e.client.Create(ctx, secret); err != nil { - return "", errors.Wrap(err) + return ctrl.Result{}, false, errors.Wrap(err) } - return password, nil + return ctrl.Result{}, true, nil } if err != nil { - return "", errors.Wrap(err) + return ctrl.Result{}, false, errors.Wrap(err) + } + + // If secret exists but is being deleted we need to requeue until it is deleted + if !secret.DeletionTimestamp.IsZero() { + logrus.Info("Secret is being deleted, retriggering reconcile") + return ctrl.Result{Requeue: true}, false, nil } + log.Debug("Secret exists, nothing to do") - return string(secret.Data["password"]), nil + return ctrl.Result{}, false, nil } func (e *Reconciler) ensurePasswordInDatabases(ctx context.Context, pod v1.Pod, username string, password string) error { @@ -233,8 +275,11 @@ func (e *Reconciler) RotateSecretsAndAlterPasswords(ctx context.Context) error { logrus.Infof("Rotated secret: %s.%s", secret.Name, secret.Namespace) } - err := e.runAlterPasswordForSecrets(ctx, rotatedSecrets) - if err != nil { + if err := e.runAlterPasswordForSecrets(ctx, rotatedSecrets); err != nil { + return errors.Wrap(err) + } + + if err := e.handlePodRestartsForRotatedSecrets(ctx, rotatedSecrets); err != nil { return errors.Wrap(err) } @@ -350,6 +395,7 @@ func (e *Reconciler) ensurePasswordInDatabaseInstance( return true, nil } + func closeAllConnections(ctx context.Context, allConfigurators []databaseconfigurator.DatabaseConfigurator) { for _, dbConfigurator := range allConfigurators { dbConfigurator.CloseConnection(ctx) @@ -366,6 +412,7 @@ func (e *Reconciler) GetAllDBConfigurators(ctx context.Context, mysqlServerConfi } configurators = append(configurators, dbconfigurator) } + for _, pgServerConfig := range pgServerConfigs { dbconfigurator, err := postgres.NewPostgresConfigurator(ctx, pgServerConfig.Spec) if err != nil { @@ -377,6 +424,137 @@ func (e *Reconciler) GetAllDBConfigurators(ctx context.Context, mysqlServerConfi return configurators } +func (e *Reconciler) TriggerPodRestart(ctx context.Context, pod *v1.Pod) error { + owner, err := e.serviceIdResolver.GetOwnerObject(ctx, pod) + if err != nil { + return errors.Wrap(err) + } + kind := owner.GetObjectKind().GroupVersionKind().Kind + logrus.Infof("Trying to trigger restart for workload '%s' of kind %s", owner.GetName(), kind) + switch kind { + case "Deployment": + deployment := appsv1.Deployment{} + if err := e.client.Get(ctx, types.NamespacedName{Namespace: pod.Namespace, Name: owner.GetName()}, &deployment); err != nil { + return errors.Wrap(err) + } + deployment.Spec.Template.Annotations[metadata.TLSRestartTimeAfterRenewal] = time.Now().Format(time.RFC3339) + if err := e.client.Update(ctx, &deployment); err != nil { + return errors.Wrap(err) + } + case "ReplicaSet": + replicaSet := appsv1.ReplicaSet{} + if err := e.client.Get(ctx, types.NamespacedName{Namespace: pod.Namespace, Name: owner.GetName()}, &replicaSet); err != nil { + return errors.Wrap(err) + } + replicaSet.Spec.Template.Annotations[metadata.TLSRestartTimeAfterRenewal] = time.Now().Format(time.RFC3339) + if err := e.client.Update(ctx, &replicaSet); err != nil { + return errors.Wrap(err) + } + case "StatefulSet": + statefulSet := appsv1.StatefulSet{} + if err := e.client.Get(ctx, types.NamespacedName{Namespace: pod.Namespace, Name: owner.GetName()}, &statefulSet); err != nil { + return errors.Wrap(err) + } + statefulSet.Spec.Template.Annotations[metadata.TLSRestartTimeAfterRenewal] = time.Now().Format(time.RFC3339) + if err := e.client.Update(ctx, &statefulSet); err != nil { + return errors.Wrap(err) + } + case "DaemonSet": + daemonSet := appsv1.DaemonSet{} + if err := e.client.Get(ctx, types.NamespacedName{Namespace: pod.Namespace, Name: owner.GetName()}, &daemonSet); err != nil { + return errors.Wrap(err) + } + daemonSet.Spec.Template.Annotations[metadata.TLSRestartTimeAfterRenewal] = time.Now().Format(time.RFC3339) + if err := e.client.Update(ctx, &daemonSet); err != nil { + return errors.Wrap(err) + } + default: + // Rougher way - trigger delete on all pods in the namespace with the same labels - will restart just the owner's pods + err := e.client.DeleteAllOf(ctx, &v1.Pod{ + ObjectMeta: metav1.ObjectMeta{Namespace: pod.Namespace}, + }, client.MatchingLabels(pod.Labels)) + if err != nil { + return errors.Wrap(err) + } + } + + return nil +} + +func (e *Reconciler) handlePodRestartsForRotatedSecrets(ctx context.Context, rotatedSecrets []v1.Secret) error { + for _, secret := range rotatedSecrets { + podList := &v1.PodList{} + if err := e.client.List(ctx, podList, &client.ListOptions{Namespace: secret.Namespace}); err != nil { + return errors.Wrap(err) + } + for _, pod := range podList.Items { + secretName, ok := pod.Annotations[metadata.UserAndPasswordSecretNameAnnotation] + if ok && secretName == secret.Name { + if hasRestartAnnotation(pod) { + logrus.Info("Triggering pod restart after secret rotation") + err := e.TriggerPodRestart(ctx, &pod) + if err != nil { + e.recorder.Eventf(&pod, v1.EventTypeWarning, + ReasonRestartingPodAfterSecretRotationFailed, "Failed restarting pod after secret rotation: %s", err.Error()) + } + // A single restart trigger should handle all pod replicas, we can break + // If we failed, we continue to try and restart pods for other rotated secrets + break + } + } + } + } + + return nil +} + +func (e *Reconciler) watchSecretDeletion(ctx context.Context, object client.Object) []reconcile.Request { + namespace := v1.Namespace{} + err := e.client.Get(ctx, types.NamespacedName{Name: object.GetNamespace()}, &namespace) + if err != nil { + logrus.WithError(err).Error("Failed to get namespace") + return nil + } + if !namespace.DeletionTimestamp.IsZero() { + // Skip deleting namespaces + return nil + } + + secret := v1.Secret{} + err = e.client.Get(ctx, types.NamespacedName{Namespace: object.GetNamespace(), Name: object.GetName()}, &secret) + if err != nil && !apierrors.IsNotFound(err) { + logrus.WithError(err).Error("Failed to get secret") + return nil + } + + // Only call reconcile if the secret is being deleted or is not found anymore + if !secret.DeletionTimestamp.IsZero() || apierrors.IsNotFound(err) { + podList := v1.PodList{} + if err := e.client.List(ctx, &podList, &client.ListOptions{Namespace: object.GetNamespace()}); err != nil { + logrus.WithError(err).Error("Failed to list pods") + return nil + } + + if len(podList.Items) == 0 { + return nil + } + + result := lo.Filter(podList.Items, func(pod v1.Pod, _ int) bool { + secretName, ok := pod.Annotations[metadata.UserAndPasswordSecretNameAnnotation] + return ok && secretName == object.GetName() + }) + + if len(result) > 0 { + // Even if more than 1 pods match the criteria, we just need to enqueue reconciliation for one + pod := result[0] + logrus.Infof("Enqueueing pod reconciliation: %s for deleted secret: %s", pod.Name, object.GetName()) + return []reconcile.Request{{NamespacedName: types.NamespacedName{Namespace: pod.Namespace, Name: pod.Name}}} + } + } + + return nil +} + func shouldRotateSecret(secret v1.Secret) bool { if secret.Annotations == nil { return true diff --git a/src/operator/go.mod b/src/operator/go.mod index c08cf6f..f7b6774 100644 --- a/src/operator/go.mod +++ b/src/operator/go.mod @@ -117,7 +117,7 @@ require ( github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd // indirect github.com/modern-go/reflect2 v1.0.2 // indirect github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822 // indirect - github.com/otterize/intents-operator/src v0.0.0-20240521104220-ba00b7c59637 // indirect + github.com/otterize/intents-operator/src v0.0.0-20240623092600-0eb9a9628a9f // indirect github.com/otterize/nilable v0.0.0-20240410132629-f242bb6f056f // indirect github.com/pelletier/go-toml v1.9.5 // indirect github.com/pelletier/go-toml/v2 v2.0.8 // indirect diff --git a/src/operator/go.sum b/src/operator/go.sum index dc2bec3..c2c13c7 100644 --- a/src/operator/go.sum +++ b/src/operator/go.sum @@ -432,10 +432,12 @@ github.com/onsi/ginkgo/v2 v2.14.0 h1:vSmGj2Z5YPb9JwCWT6z6ihcUvDhuXLc3sJiqd3jMKAY github.com/onsi/ginkgo/v2 v2.14.0/go.mod h1:JkUdW7JkN0V6rFvsHcJ478egV3XH9NxpD27Hal/PhZw= github.com/onsi/gomega v1.30.0 h1:hvMK7xYz4D3HapigLTeGdId/NcfQx1VHMJc60ew99+8= github.com/onsi/gomega v1.30.0/go.mod h1:9sxs+SwGrKI0+PWe4Fxa9tFQQBG5xSsSbMXOI8PPpoQ= -github.com/otterize/intents-operator/src v0.0.0-20240521053840-36662b8fd8fa h1:R+wqqZ5Gd/MoD3Jy9I2e6ZxBylh63D+EAJx87x1wzIE= -github.com/otterize/intents-operator/src v0.0.0-20240521053840-36662b8fd8fa/go.mod h1:7vDL6/NAo7AobUGqDGU/277xGyb0KTRQoqRjoouhh44= github.com/otterize/intents-operator/src v0.0.0-20240521104220-ba00b7c59637 h1:fhtXDgHYymOrHaAdaBg7kTs8D53u35nmGXYLiDhVjtU= github.com/otterize/intents-operator/src v0.0.0-20240521104220-ba00b7c59637/go.mod h1:7vDL6/NAo7AobUGqDGU/277xGyb0KTRQoqRjoouhh44= +github.com/otterize/intents-operator/src v0.0.0-20240623092600-0eb9a9628a9f h1:Gj2hrAncbdgXICnnxpICu4NbNd1Dc+GciNGlqClg13g= +github.com/otterize/intents-operator/src v0.0.0-20240623092600-0eb9a9628a9f/go.mod h1:7vDL6/NAo7AobUGqDGU/277xGyb0KTRQoqRjoouhh44= +github.com/otterize/intents-operator/src v0.1.15 h1:Cd0dMKLsi6iz1y3c0KKrq4dXABWVrq2Jo37aQAYcJQA= +github.com/otterize/intents-operator/src v0.1.15/go.mod h1:J3iXhY18AZzG19op3JbnduQyJtGu0OUaM44Kb/OxDuI= github.com/otterize/lox v0.0.0-20220525164329-9ca2bf91c3dd h1:7Sb95VrtAPb9m2ewtqLnX1oeKQy03dt7yr6F/hP7Htg= github.com/otterize/lox v0.0.0-20220525164329-9ca2bf91c3dd/go.mod h1:RXvgymN8MxiELFkmGHzJ23KJU2ObVsNsNSM80/HO8qQ= github.com/otterize/nilable v0.0.0-20240410132629-f242bb6f056f h1:gv92189CW53A+Y0UQ550zr6RfCBYqvYJ8oq6Jll1YqQ=