Skip to content

Commit

Permalink
[rhythm] Add rollout-operator and allow simultaneous update of block-…
Browse files Browse the repository at this point in the history
…builder pods (#4660)

* Add rollout-operator

* Merge multi-zone operator and add more functionality

* Bump version

* Automatically configure enable_groups

* fmt
  • Loading branch information
mapno authored Feb 10, 2025
1 parent 161a5e9 commit 842da4a
Show file tree
Hide file tree
Showing 8 changed files with 364 additions and 66 deletions.
3 changes: 2 additions & 1 deletion example/tk/tempo-microservices/main.jsonnet
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ minio + metrics + load + tempo {
_config+:: {
cluster: 'k3d',
namespace: 'default',
block_builder_concurrent_rollout_enabled: true,
compactor+: {
},
querier+: {
Expand All @@ -41,7 +42,7 @@ minio + metrics + load + tempo {
pvc_storage_class: 'local-path',
},
block_builder+:{
replicas: 1,
replicas: 2,
},
memcached+: {
replicas: 1,
Expand Down
15 changes: 13 additions & 2 deletions operations/jsonnet/microservices/block-builder.libsonnet
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@
$.util.readinessProbe +
(if $._config.variables_expansion then container.withArgsMixin(['-config.expand-env=true']) else {}),

tempo_block_builder_statefulset:
newBlockBuilderStatefulSet(concurrent_rollout_enabled=false, max_unavailable=1)::
statefulset.new(target_name, $._config.block_builder.replicas, $.tempo_block_builder_container, [], { app: target_name }) +
statefulset.mixin.spec.withServiceName(target_name) +
statefulset.spec.template.spec.securityContext.withFsGroup(10001) + // 10001 is the UID of the tempo user
Expand All @@ -47,7 +47,18 @@
volume.fromConfigMap(tempo_config_volume, $.tempo_block_builder_configmap.metadata.name),
volume.fromConfigMap(tempo_overrides_config_volume, $._config.overrides_configmap_name),
]) +
statefulset.mixin.spec.withPodManagementPolicy('Parallel'),
statefulset.mixin.spec.withPodManagementPolicy('Parallel') +
(
if !concurrent_rollout_enabled then {} else
statefulset.mixin.spec.selector.withMatchLabels({ name: 'block-builder', 'rollout-group': 'block-builder' }) +
statefulset.mixin.spec.updateStrategy.withType('OnDelete') +
statefulset.mixin.metadata.withLabelsMixin({ 'rollout-group': 'block-builder' }) +
statefulset.mixin.metadata.withAnnotationsMixin({ 'rollout-max-unavailable': std.toString(max_unavailable) }) +
statefulset.mixin.spec.template.metadata.withLabelsMixin({ 'rollout-group': 'block-builder' })
),

tempo_block_builder_statefulset:
$.newBlockBuilderStatefulSet($._config.block_builder_concurrent_rollout_enabled, $._config.block_builder_max_unavailable),

// Configmap

Expand Down
9 changes: 8 additions & 1 deletion operations/jsonnet/microservices/config.libsonnet
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
tempo: 'grafana/tempo:latest',
tempo_query: 'grafana/tempo-query:latest',
tempo_vulture: 'grafana/tempo-vulture:latest',
rollout_operator: 'grafana/rollout-operator:v0.1.1',
rollout_operator: 'grafana/rollout-operator:v0.23.0',
memcached: 'memcached:1.6.32-alpine',
memcachedExporter: 'prom/memcached-exporter:v0.14.3',
},
Expand All @@ -18,6 +18,13 @@
node_selector: null,
ingester_allow_multiple_replicas_on_same_node: false,

// Enable concurrent rollout of block-builder through the usage of the rollout operator.
// This feature modifies the block-builder StatefulSet which cannot be altered, so if it already exists it has to be deleted and re-applied again in order to be enabled.
block_builder_concurrent_rollout_enabled: false,
// Maximum number of unavailable replicas during a block-builder rollout when using block_builder_concurrent_rollout_enabled feature.
// Computed from block-builder replicas by default, but can also be specified as percentage, for example "25%".
block_builder_max_unavailable: $.tempo_block_builder_statefulset.spec.replicas,

// disable tempo-query by default
tempo_query: {
enabled: false,
Expand Down
63 changes: 1 addition & 62 deletions operations/jsonnet/microservices/multi-zone.libsonnet
Original file line number Diff line number Diff line change
Expand Up @@ -133,70 +133,9 @@
// Keep it if multi-zone is disabled.
if !$._config.multi_zone_ingester_enabled
then super.ingester_pdb
// We dont want Kubernetes to terminate any "ingester" StatefulSet's pod while migration is in progress.
// We don't want Kubernetes to terminate any "ingester" StatefulSet's pod while migration is in progress.
else if $._config.multi_zone_ingester_migration_enabled
then super.ingester_pdb + podDisruptionBudget.mixin.spec.withMaxUnavailable(0)
// Remove it if multi-zone is enabled and no migration is in progress.
else null,

//
// Rollout operator.
//

local rollout_operator_enabled = $._config.multi_zone_ingester_enabled,

rollout_operator_args:: {
'kubernetes.namespace': $._config.namespace,
},

rollout_operator_container::
container.new('rollout-operator', $._images.rollout_operator) +
container.withArgsMixin($.util.mapToFlags($.rollout_operator_args)) +
container.withPorts([
$.core.v1.containerPort.new('http-metrics', 8001),
]) +
$.util.resourcesRequests('100m', '100Mi') +
$.util.resourcesLimits('1', '200Mi') +
container.mixin.readinessProbe.httpGet.withPath('/ready') +
container.mixin.readinessProbe.httpGet.withPort(8001) +
container.mixin.readinessProbe.withInitialDelaySeconds(5) +
container.mixin.readinessProbe.withTimeoutSeconds(1),

rollout_operator_deployment: if !rollout_operator_enabled then null else
deployment.new('rollout-operator', 1, [$.rollout_operator_container]) +
deployment.mixin.metadata.withName('rollout-operator') +
deployment.mixin.spec.template.spec.withServiceAccountName('rollout-operator') +
// Ensure Kubernetes doesn't run 2 operators at the same time.
deployment.mixin.spec.strategy.rollingUpdate.withMaxSurge(0) +
deployment.mixin.spec.strategy.rollingUpdate.withMaxUnavailable(1),

rollout_operator_role: if !rollout_operator_enabled then null else
role.new('rollout-operator-role') +
role.mixin.metadata.withNamespace($._config.namespace) +
role.withRulesMixin([
policyRule.withApiGroups('') +
policyRule.withResources(['pods']) +
policyRule.withVerbs(['list', 'get', 'watch', 'delete']),
policyRule.withApiGroups('apps') +
policyRule.withResources(['statefulsets']) +
policyRule.withVerbs(['list', 'get', 'watch']),
policyRule.withApiGroups('apps') +
policyRule.withResources(['statefulsets/status']) +
policyRule.withVerbs(['update']),
]),

rollout_operator_rolebinding: if !rollout_operator_enabled then null else
roleBinding.new('rollout-operator-rolebinding') +
roleBinding.mixin.metadata.withNamespace($._config.namespace) +
roleBinding.mixin.roleRef.withApiGroup('rbac.authorization.k8s.io') +
roleBinding.mixin.roleRef.withKind('Role') +
roleBinding.mixin.roleRef.withName('rollout-operator-role') +
roleBinding.withSubjectsMixin({
kind: 'ServiceAccount',
name: 'rollout-operator',
namespace: $._config.namespace,
}),

rollout_operator_service_account: if !rollout_operator_enabled then null else
serviceAccount.new('rollout-operator'),
}
32 changes: 32 additions & 0 deletions operations/jsonnet/microservices/replica-template.libsonnet
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
{
_config+: {
replica_template_custom_resource_definition_enabled: false,
},

replica_template:: std.parseYaml(importstr 'replica-templates.yaml'),
replica_template_custom_resource: if !$._config.replica_template_custom_resource_definition_enabled then null else $.replica_template,

// replicaTemplate creates new ReplicaTemplate resource.
// If replicas is > 0, spec.replicas field is specified in the resource, if replicas <= 0, spec.replicas field is hidden.
// Syntactically valid label selector is required, and may be used by HorizontalPodAutoscaler controller when ReplicaTemplate
// is used as scaled resource depending on metric target type.
// (When using targetType=AverageValue, label selector is not used for scaling computation).
replicaTemplate(name, replicas, label_selector):: {
apiVersion: 'rollout-operator.grafana.com/v1',
kind: 'ReplicaTemplate',
metadata: {
name: name,
namespace: $._config.namespace,
},
spec: {
// HPA requires that label selector exists and is valid, but it will not be used for target type of AverageValue.
labelSelector: label_selector,
} + (
if replicas <= 0 then {
replicas:: null, // Hide replicas field.
} else {
replicas: replicas,
}
),
},
}
52 changes: 52 additions & 0 deletions operations/jsonnet/microservices/replica-templates.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,52 @@
apiVersion: apiextensions.k8s.io/v1
kind: CustomResourceDefinition
metadata:
# must be in the form: <plural>.<group>
name: replicatemplates.rollout-operator.grafana.com
spec:
group: rollout-operator.grafana.com
versions:
- name: v1
served: true
storage: true
additionalPrinterColumns:
- description: Status replicas
jsonPath: .status.replicas
name: StatusReplicas
type: string
- description: Spec replicas
jsonPath: .spec.replicas
name: SpecReplicas
type: string
schema:
openAPIV3Schema:
type: object
properties:
spec:
type: object
properties:
replicas:
type: integer
default: 1
minimum: 0
labelSelector:
type: string
status:
type: object
properties:
replicas:
type: integer
subresources:
status: { }
scale:
specReplicasPath: .spec.replicas
statusReplicasPath: .status.replicas
labelSelectorPath: .spec.labelSelector
scope: Namespaced
names:
plural: replicatemplates
singular: replicatemplate
kind: ReplicaTemplate
categories:
# Include in "kubectl get all" output
- all
Loading

0 comments on commit 842da4a

Please sign in to comment.