From 89a26154405ffd020ac05c761fdc6ebf4c2d3775 Mon Sep 17 00:00:00 2001 From: Francesco Pantano Date: Tue, 19 Mar 2024 13:40:51 +0100 Subject: [PATCH] Fix Ceph image conversion With the current implementation, we require the human operator to build a PVC for Image Conversion purposes. The PVC, attached to an arbitrary storage class, can then be injected and mounted to the right path via ExtraMounts. However, this implementation is good only for RWX PVCs, because it realizes a common space for all the glanceAPIs and replicas where the conversion happens. If a RWO PVC is used, a replica deployed on a different node won't be able to mount the PVC, and worse, it will be stuck in `Pending` state due to the Distribution affinity policy implemented in the operator. Even with the NodeSelector usage, there will be cases where this is going to generate issues. This patch fixes the problem described above with the following: 1. It introduces a PVC, created by the StatefulSet (via templating) only for the "external" glanceAPI instances 2. It creates the imageConversion PVC only if Ceph is enabled 3. It automatically sets the Image conversion plugin feature in Glance 00-config.conf when Ceph is part of the enabled_backend string 4. Takes care about the StatefulSet update for fields that are forbidden The image conversion PVC is a RWO, stable storage that can be used by each replica (tested with replica:3) and we avoid any manual action required by a human operator. ImageConversion is enabled by default with Ceph and it's transparent to the end user. Fixes: https://issues.redhat.com/browse/OSPRH-5690 Signed-off-by: Francesco Pantano --- config/samples/backends/README.md | 26 ++++ .../image_conversion/image_conversion.yaml | 57 --------- .../image_conversion_pvc.yaml | 11 -- controllers/glanceapi_controller.go | 121 ++++++++++++++---- pkg/glance/const.go | 3 + pkg/glance/pvc.go | 13 +- pkg/glance/volumes.go | 11 ++ pkg/glanceapi/statefulset.go | 15 +++ templates/common/config/00-config.conf | 12 +- 9 files changed, 168 insertions(+), 101 deletions(-) delete mode 100644 config/samples/import_plugins/image_conversion/image_conversion.yaml delete mode 100644 config/samples/import_plugins/image_conversion/image_conversion_pvc.yaml diff --git a/config/samples/backends/README.md b/config/samples/backends/README.md index c770bd93..c756027a 100644 --- a/config/samples/backends/README.md +++ b/config/samples/backends/README.md @@ -53,6 +53,32 @@ $ make openstack_deploy If we already have a deployment working we can always use `oc kustomize ceph | oc apply -f -`. from this directory to make the changes. +**Note:** + +When Ceph is adopted as a backend, Glance `image-conversion` is enabled by default. +It's realized through a dedicated `PVC` (built by the `StatefulSet` via templates) +that is mounted to the `/var/lib/glance/os_glance_staging_store` path. +A `glance-conversion` PVC can be found with if the Glance **external** Pod is +inspected via the `oc describe pod .. ` command: + + +```bash +... + Mounts: + /etc/ceph from ceph (ro) + /etc/my.cnf from config-data (ro,path="my.cnf") + /usr/local/bin/container-scripts from scripts (ro) + /var/lib/config-data/default from config-data (ro) + /var/lib/glance from glance (rw) + /var/lib/glance/os_glance_staging_store from glance-conversion (rw) + /var/lib/kolla/config_files/config.json from config-data (ro,path="glance-api-config.json") + /var/log/glance from logs (rw) +... +``` +The PVC is only created for the external instance: this space is only used to +store staging data of the image that is going to be uploaded, and an internal +`glanceAPI` will never use it. + ## Ceph with Sparse Image Upload example Assuming you are using `install_yamls` and you already have `crc` running you diff --git a/config/samples/import_plugins/image_conversion/image_conversion.yaml b/config/samples/import_plugins/image_conversion/image_conversion.yaml deleted file mode 100644 index 3c2277a3..00000000 --- a/config/samples/import_plugins/image_conversion/image_conversion.yaml +++ /dev/null @@ -1,57 +0,0 @@ -# Sample using Ceph as a glance backend with image conversion plugin -# Requires a running Ceph cluster and its `/etc/ceph` files in secret `ceph-conf-files` -# This can be achieved with the `ceph` target of `install_yamls` -apiVersion: glance.openstack.org/v1beta1 -kind: Glance -metadata: - name: glance -spec: - serviceUser: glance - containerImage: quay.io/podified-antelope-centos9/openstack-glance-api:current-podified - customServiceConfig: | - [DEFAULT] - enabled_backends = default_backend:rbd - enabled_import_methods=[web-download,glance-direct] - [glance_store] - default_backend = default_backend - [default_backend] - rbd_store_ceph_conf = /etc/ceph/ceph.conf - store_description = "RBD backend" - rbd_store_pool = images - rbd_store_user = openstack - [image_import_opts] - image_import_plugins = ['image_conversion'] - [image_conversion] - output_format = raw - databaseInstance: openstack - databaseAccount: glance - glanceAPI: - preserveJobs: false - replicas: 1 - secret: osp-secret - storageClass: "" - storageRequest: 1G - extraMounts: - - name: v1 - region: r1 - extraVol: - - propagation: - - Glance - extraVolType: Ceph - volumes: - - name: ceph - projected: - sources: - - secret: - name: ceph-conf-files - - name: image-import-staging-workspace - persistentVolumeClaim: - claimName: image-import-staging-workspace - readOnly: false - mounts: - - name: ceph - mountPath: "/etc/ceph" - readOnly: true - - name: image-import-staging-workspace - mountPath: /var/lib/glance/os_glance_staging_store/ - readOnly: false diff --git a/config/samples/import_plugins/image_conversion/image_conversion_pvc.yaml b/config/samples/import_plugins/image_conversion/image_conversion_pvc.yaml deleted file mode 100644 index eae37771..00000000 --- a/config/samples/import_plugins/image_conversion/image_conversion_pvc.yaml +++ /dev/null @@ -1,11 +0,0 @@ -apiVersion: v1 -kind: PersistentVolumeClaim -metadata: - name: image-import-staging-workspace -spec: - accessModes: - - ReadWriteOnce - resources: - requests: - storage: 5Gi - storageClassName: local-storage diff --git a/controllers/glanceapi_controller.go b/controllers/glanceapi_controller.go index 81889bd8..0b977b97 100644 --- a/controllers/glanceapi_controller.go +++ b/controllers/glanceapi_controller.go @@ -540,6 +540,7 @@ func (r *GlanceAPIReconciler) reconcileNormal(ctx context.Context, instance *gla configVars := make(map[string]env.Setter) privileged := false + imageConv := false // // check for required OpenStack secret holding passwords for service/admin user and add hash to the vars map @@ -566,7 +567,45 @@ func (r *GlanceAPIReconciler) reconcileNormal(ctx context.Context, instance *gla instance.Status.Conditions.MarkTrue(condition.InputReadyCondition, condition.InputReadyMessage) // run check OpenStack secret - end - err = r.generateServiceConfig(ctx, helper, instance, &configVars) + // Get Enabled backends from customServiceConfig and run pre backend conditions + availableBackends := glancev1.GetEnabledBackends(instance.Spec.CustomServiceConfig) + _, hashChanged, err := r.createHashOfBackendConfig(instance, availableBackends) + if err != nil { + return ctrl.Result{}, err + } + // Update the current StateFulSet (by recreating it) only when a backend is + // added or removed from an already existing API + if hashChanged { + if err = r.glanceAPIRefresh(ctx, helper, instance); err != nil { + return ctrl.Result{}, err + } + } + // iterate over availableBackends for backend specific cases + for i := 0; i < len(availableBackends); i++ { + backendToken := strings.SplitN(availableBackends[i], ":", 2) + switch { + case backendToken[1] == "cinder": + cinder := &cinderv1.Cinder{} + err := r.Get(ctx, types.NamespacedName{Namespace: instance.Namespace, Name: glance.CinderName}, cinder) + if err != nil { + if errors.IsNotFound(err) { + // Request object not found, can't run GlanceAPI with this config + r.Log.Info("Cinder resource not found. Waiting for it to be deployed") + return ctrl.Result{RequeueAfter: time.Duration(10) * time.Second}, nil + } + } + // Cinder CR is found, we can unblock glance deployment because + // it represents a valid backend. + privileged = true + case backendToken[1] == "rbd": + // enable image conversion by default + r.Log.Info("Ceph config detected: enable image conversion by default") + imageConv = true + } + } + + // Generate service config + err = r.generateServiceConfig(ctx, helper, instance, &configVars, imageConv) if err != nil { instance.Status.Conditions.Set(condition.FalseCondition( condition.ServiceConfigReadyCondition, @@ -650,28 +689,6 @@ func (r *GlanceAPIReconciler) reconcileNormal(ctx context.Context, instance *gla // TODO check when/if Init, Update, or Upgrade should/could be skipped // - // Get Enabled backends from customServiceConfig and run pre backend conditions - availableBackends := glancev1.GetEnabledBackends(instance.Spec.CustomServiceConfig) - // iterate over availableBackends for backend specific cases - for i := 0; i < len(availableBackends); i++ { - backendToken := strings.SplitN(availableBackends[i], ":", 2) - switch { - case backendToken[1] == "cinder": - cinder := &cinderv1.Cinder{} - err := r.Get(ctx, types.NamespacedName{Namespace: instance.Namespace, Name: glance.CinderName}, cinder) - if err != nil { - if errors.IsNotFound(err) { - // Request object not found, can't run GlanceAPI with this config - r.Log.Info("Cinder resource not found. Waiting for it to be deployed") - return ctrl.Result{RequeueAfter: time.Duration(10) * time.Second}, nil - } - } - // Cinder CR is found, we can unblock glance deployment because - // it represents a valid backend. - privileged = true - } - } - var serviceAnnotations map[string]string // networks to attach to serviceAnnotations, ctrlResult, err = ensureNAD(ctx, &instance.Status.Conditions, instance.Spec.NetworkAttachments, helper) @@ -713,6 +730,7 @@ func (r *GlanceAPIReconciler) reconcileNormal(ctx context.Context, instance *gla GetServiceLabels(instance), serviceAnnotations, privileged, + imageConv, ) if err != nil { return ctrlResult, err @@ -824,6 +842,7 @@ func (r *GlanceAPIReconciler) generateServiceConfig( h *helper.Helper, instance *glancev1.GlanceAPI, envVars *map[string]env.Setter, + imageConv bool, ) error { labels := labels.GetLabels(instance, labels.GetGroupLabel(glance.ServiceName), GetServiceLabels(instance)) @@ -905,9 +924,10 @@ func (r *GlanceAPIReconciler) generateServiceConfig( // If Quota values are defined in the top level spec (they are global values), // each GlanceAPI instance should build the config file according to // https://docs.openstack.org/glance/latest/admin/quotas.html - "QuotaEnabled": instance.Spec.Quota, - "LogFile": fmt.Sprintf("%s%s.log", glance.GlanceLogPath, instance.Name), - "VHosts": httpdVhostConfig, + "QuotaEnabled": instance.Spec.Quota, + "ImageConversion": imageConv, + "LogFile": fmt.Sprintf("%s%s.log", glance.GlanceLogPath, instance.Name), + "VHosts": httpdVhostConfig, } // Configure the internal GlanceAPI to provide image location data, and the @@ -970,6 +990,28 @@ func (r *GlanceAPIReconciler) createHashOfInputHashes( return hash, changed, nil } +// createHashOfBackendConfig - It creates an Hash of the current "enabledBackend" +// string, and it's set in the .Status.Hash of the current GlanceAPI. +// If a backend is added or removed, we're able to attach a new PVC for an existing +// API by recreating the StateFulSet through the glanceAPIRefresh function. This +// function helps to figure out if the glanceAPIRefresh should be triggered or not +func (r *GlanceAPIReconciler) createHashOfBackendConfig( + instance *glancev1.GlanceAPI, + backends []string, +) (string, bool, error) { + var hashMap map[string]string + changed := false + hash, err := util.ObjectHash(backends) + if err != nil { + return hash, changed, err + } + if hashMap, changed = util.SetHash(instance.Status.Hash, "backendHash", hash); changed { + instance.Status.Hash = hashMap + r.Log.Info(fmt.Sprintf("Backend hash %s - %s", "backendHash", hash)) + } + return hash, changed, nil +} + // ensureKeystoneEndpoints - create or update keystone endpoints func (r *GlanceAPIReconciler) ensureKeystoneEndpoints( ctx context.Context, @@ -1147,7 +1189,7 @@ func (r *GlanceAPIReconciler) deleteImageCacheJob( return ctrlResult, err } -// delete an imageCache cronJob no longer used +// deleteJob - delete an imageCache cronJob no longer used func (r *GlanceAPIReconciler) deleteJob( ctx context.Context, instance *glancev1.GlanceAPI, @@ -1178,3 +1220,28 @@ func (r *GlanceAPIReconciler) deleteJob( } return ctrlResult, err } + +// glanceAPIRefresh - delete a StateFulSet when a configuration for a Forbidden +// parameter happens: it might be required if we add / remove a backend (including +// ceph) where imageConversion is enabled and a dedicated PVC is created using +// statefulsets volume templates +func (r *GlanceAPIReconciler) glanceAPIRefresh( + ctx context.Context, + h *helper.Helper, + instance *glancev1.GlanceAPI, +) error { + sts, err := statefulset.GetStatefulSetWithName(ctx, h, fmt.Sprintf("%s-api", instance.Name), instance.Namespace) + if err != nil { + if errors.IsNotFound(err) { + // Request object not found + r.Log.Info(fmt.Sprintf("GlanceAPI %s-api: Statefulset not found.", instance.Name)) + return nil + } + } + err = r.Client.Delete(ctx, sts) + if err != nil && !k8s_errors.IsNotFound(err) { + err = fmt.Errorf("Error deleting %s: %w", instance.Name, err) + return err + } + return nil +} diff --git a/pkg/glance/const.go b/pkg/glance/const.go index e06c7149..d3d93550 100644 --- a/pkg/glance/const.go +++ b/pkg/glance/const.go @@ -43,6 +43,9 @@ const ( PvcLocal PvcType = "local" // PvcCache is used to define a PVC mounted for image caching purposes PvcCache PvcType = "cache" + // PvcImageConv is used to define a PVC mounted for image conversion purposes + // when Ceph is detected as a backend + PvcImageConv PvcType = "imageConv" // GlancePublicPort - GlancePublicPort int32 = 9292 diff --git a/pkg/glance/pvc.go b/pkg/glance/pvc.go index b36cb1c1..46111b4e 100644 --- a/pkg/glance/pvc.go +++ b/pkg/glance/pvc.go @@ -18,13 +18,21 @@ func GetPvc(api *glancev1.GlanceAPI, labels map[string]string, pvcType PvcType) pvcName := ServiceName pvcAnnotation := map[string]string{} - if pvcType == PvcCache { + switch { + case pvcType == PvcCache: pvcAnnotation["image-cache"] = "true" requestSize = api.Spec.GlanceAPITemplate.ImageCache.Size // append -cache to avoid confusion when listing PVCs pvcName = fmt.Sprintf("%s-cache", ServiceName) + case pvcType == PvcImageConv: + pvcAnnotation["image-conversion"] = "true" + requestSize = api.Spec.StorageRequest + // append -conversion to avoid confusion when listing PVCs + pvcName = fmt.Sprintf("%s-conversion", ServiceName) + default: + pvcName = ServiceName + requestSize = api.Spec.StorageRequest } - // Build the basic pvc object pvc := corev1.PersistentVolumeClaim{ ObjectMeta: metav1.ObjectMeta{ @@ -34,7 +42,6 @@ func GetPvc(api *glancev1.GlanceAPI, labels map[string]string, pvcType PvcType) Annotations: pvcAnnotation, }, } - // If the StorageRequest is a wrong string, we must return // an error. MustParse can't be used in this context as it // generates panic() and we can't recover the operator. diff --git a/pkg/glance/volumes.go b/pkg/glance/volumes.go index 2c36ee0c..617a2b54 100644 --- a/pkg/glance/volumes.go +++ b/pkg/glance/volumes.go @@ -291,6 +291,17 @@ func GetCacheVolumeMount() []corev1.VolumeMount { } } +// GetImageConvVolumeMount - Return the VolumeMount used for image conversion +func GetImageConvVolumeMount() []corev1.VolumeMount { + return []corev1.VolumeMount{ + { + Name: "glance-conversion", + MountPath: "/var/lib/glance/os_glance_staging_store", + ReadOnly: false, + }, + } +} + // GetScriptVolume - func GetScriptVolume() []corev1.Volume { var scriptsVolumeDefaultMode int32 = 0755 diff --git a/pkg/glanceapi/statefulset.go b/pkg/glanceapi/statefulset.go index 3771b4af..53ddcf04 100644 --- a/pkg/glanceapi/statefulset.go +++ b/pkg/glanceapi/statefulset.go @@ -48,6 +48,7 @@ func StatefulSet( labels map[string]string, annotations map[string]string, privileged bool, + imageConv bool, ) (*appsv1.StatefulSet, error) { runAsUser := int64(0) var config0644AccessMode int32 = 0644 @@ -138,6 +139,11 @@ func StatefulSet( if len(instance.Spec.ImageCache.Size) > 0 { apiVolumeMounts = append(apiVolumeMounts, glance.GetCacheVolumeMount()...) } + // If Ceph has been set as a backend for this GlanceAPI, build and append + // an imageConv PVC + if imageConv && instance.Spec.APIType == glancev1.APIExternal { + apiVolumeMounts = append(apiVolumeMounts, glance.GetImageConvVolumeMount()...) + } extraVolPropagation := append(glance.GlanceAPIPropagation, storage.PropagationType(glance.GetGlanceAPIName(instance.Name))) @@ -288,6 +294,15 @@ func StatefulSet( } statefulset.Spec.VolumeClaimTemplates = append(statefulset.Spec.VolumeClaimTemplates, cachePvc) } + // If Ceph is defined as a backend, each Pod should have its own imageConv RWO PVC + if imageConv && instance.Spec.APIType == glancev1.APIExternal { + imgConvPvc, err := glance.GetPvc(instance, labels, glance.PvcImageConv) + if err != nil { + return statefulset, err + } + statefulset.Spec.VolumeClaimTemplates = append(statefulset.Spec.VolumeClaimTemplates, imgConvPvc) + } + statefulset.Spec.Template.Spec.Volumes = append(glance.GetVolumes( instance.Name, glance.ServiceName, diff --git a/templates/common/config/00-config.conf b/templates/common/config/00-config.conf index c3108f59..5c8da53a 100644 --- a/templates/common/config/00-config.conf +++ b/templates/common/config/00-config.conf @@ -112,8 +112,14 @@ endpoint_id = {{ .EndpointID }} region_name = {{ .Region }} {{ end }} -[image_import_opts] -image_import_plugins = ['no_op'] - [os_brick] lock_path = /var/locks/openstack/os-brick + +[image_import_opts] +{{ if (index . "ImageConversion") }} +image_import_plugins = ['image_conversion'] +[image_conversion] +output_format = raw +{{ else }} +image_import_plugins = ['no_op'] +{{ end }}