diff --git a/Makefile b/Makefile index 9e638b9e3..971e023d9 100644 --- a/Makefile +++ b/Makefile @@ -60,6 +60,7 @@ uninstall: manifests ## Uninstall CRDs, rbac deploy: ## Deploy operator yq eval '(.spec.template.spec.containers[] | select(.name=="percona-server-mongodb-operator")).image = "$(IMAGE)"' $(DEPLOYDIR)/operator.yaml \ | yq eval '(.spec.template.spec.containers[] | select(.name=="percona-server-mongodb-operator").env[] | select(.name=="LOG_LEVEL")).value="DEBUG"' - \ + | yq eval '(.spec.template.spec.containers[] | select(.name=="percona-server-mongodb-operator").env[] | select(.name=="DISABLE_TELEMETRY")).value="true"' - \ | kubectl apply -f - undeploy: ## Undeploy operator diff --git a/config/crd/bases/psmdb.percona.com_perconaservermongodbs.yaml b/config/crd/bases/psmdb.percona.com_perconaservermongodbs.yaml index dd9ec3ee0..444936a19 100644 --- a/config/crd/bases/psmdb.percona.com_perconaservermongodbs.yaml +++ b/config/crd/bases/psmdb.percona.com_perconaservermongodbs.yaml @@ -18849,14 +18849,16 @@ spec: initialized: type: boolean members: - items: + additionalProperties: properties: name: type: string - version: + state: + type: integer + stateStr: type: string type: object - type: array + type: object message: type: string ready: diff --git a/deploy/bundle.yaml b/deploy/bundle.yaml index cd4b75baf..3cfbc255b 100644 --- a/deploy/bundle.yaml +++ b/deploy/bundle.yaml @@ -19545,14 +19545,16 @@ spec: initialized: type: boolean members: - items: + additionalProperties: properties: name: type: string - version: + state: + type: integer + stateStr: type: string type: object - type: array + type: object message: type: string ready: diff --git a/deploy/crd.yaml b/deploy/crd.yaml index dcbf4998d..693bb2209 100644 --- a/deploy/crd.yaml +++ b/deploy/crd.yaml @@ -19545,14 +19545,16 @@ spec: initialized: type: boolean members: - items: + additionalProperties: properties: name: type: string - version: + state: + type: integer + stateStr: type: string type: object - type: array + type: object message: type: string ready: diff --git a/deploy/cw-bundle.yaml b/deploy/cw-bundle.yaml index b622192e6..ced254310 100644 --- a/deploy/cw-bundle.yaml +++ b/deploy/cw-bundle.yaml @@ -19545,14 +19545,16 @@ spec: initialized: type: boolean members: - items: + additionalProperties: properties: name: type: string - version: + state: + type: integer + stateStr: type: string type: object - type: array + type: object message: type: string ready: diff --git a/e2e-tests/default-cr/run b/e2e-tests/default-cr/run index a0c80c3b7..b2511f0dc 100755 --- a/e2e-tests/default-cr/run +++ b/e2e-tests/default-cr/run @@ -43,24 +43,34 @@ function start_cluster() { } function main() { - create_infra "$namespace" - cluster="my-cluster-name" - - desc 'create secrets and start client' - kubectl_bin apply -f $deploy_dir/secrets.yaml - kubectl_bin apply -f $conf_dir/client.yml - - desc "create first PSMDB cluster $cluster" + delete_crd + check_crd_for_deletion "${GIT_BRANCH}" kubectl_bin apply ${OPERATOR_NS:+-n $OPERATOR_NS} --server-side --force-conflicts -f $deploy_dir/crd.yaml + if [ -n "$OPERATOR_NS" ]; then + create_namespace $OPERATOR_NS + create_namespace ${namespace} apply_rbac cw-rbac - kubectl_bin apply -n ${OPERATOR_NS} -f $deploy_dir/cw-operator.yaml + yq eval ' + ((.. | select(.[] == "DISABLE_TELEMETRY")) |= .value="true") | + ((.. | select(.[] == "LOG_LEVEL")) |= .value="DEBUG")' ${src_dir}/deploy/cw-operator.yaml \ + | kubectl_bin apply -f - else + create_namespace ${namespace} apply_rbac rbac - yq eval '((.. | select(.[] == "DISABLE_TELEMETRY")) |= .value="true")' "$deploy_dir/operator.yaml" \ + yq eval ' + ((.. | select(.[] == "DISABLE_TELEMETRY")) |= .value="true") | + ((.. | select(.[] == "LOG_LEVEL")) |= .value="DEBUG")' ${src_dir}/deploy/operator.yaml \ | kubectl_bin apply -f - fi + cluster="my-cluster-name" + + desc 'create secrets and start client' + kubectl_bin apply -f $deploy_dir/secrets.yaml + kubectl_bin apply -f $conf_dir/client.yml + + desc "create first PSMDB cluster $cluster" yq eval '.spec.upgradeOptions.versionServiceEndpoint = "https://check-dev.percona.com" | .spec.replsets[].affinity.antiAffinityTopologyKey = "none" | .spec.replsets[].nonvoting.affinity.antiAffinityTopologyKey = "none" | diff --git a/e2e-tests/preinit-updates/conf/some-name.yml b/e2e-tests/preinit-updates/conf/some-name.yml new file mode 100644 index 000000000..9b0b27f0e --- /dev/null +++ b/e2e-tests/preinit-updates/conf/some-name.yml @@ -0,0 +1,55 @@ +apiVersion: psmdb.percona.com/v1 +kind: PerconaServerMongoDB +metadata: + name: some-name +spec: + #platform: openshift + image: + imagePullPolicy: Always + allowUnsafeConfigurations: false + updateStrategy: SmartUpdate + backup: + enabled: false + image: perconalab/percona-server-mongodb-operator:0.4.0-backup + replsets: + - name: rs0 + # readinessDelaySec: 40 + # livenessDelaySec: 120 + affinity: + antiAffinityTopologyKey: none + resources: + limits: + cpu: 500m + memory: 0.5G + requests: + cpu: 100m + memory: 0.1G + configuration: | + operationProfiling: + mode: slowOp + slowOpThresholdMs: 100 + security: + enableEncryption: true + redactClientLogData: false + setParameter: + ttlMonitorSleepSecs: 60 + wiredTigerConcurrentReadTransactions: 128 + wiredTigerConcurrentWriteTransactions: 128 + storage: + engine: wiredTiger + wiredTiger: + collectionConfig: + blockCompressor: snappy + engineConfig: + directoryForIndexes: false + journalCompressor: snappy + indexConfig: + prefixCompression: true + volumeSpec: + persistentVolumeClaim: + resources: + requests: + storage: 1Gi + size: 3 + secrets: + users: some-users diff --git a/e2e-tests/preinit-updates/run b/e2e-tests/preinit-updates/run new file mode 100755 index 000000000..c52530afe --- /dev/null +++ b/e2e-tests/preinit-updates/run @@ -0,0 +1,28 @@ +#!/bin/bash + +set -o errexit + +test_dir=$(realpath $(dirname $0)) +. ${test_dir}/../functions +set_debug + +create_infra ${namespace} + +cluster="some-name" +desc "test starts on cluster: ${cluster}" + +apply_cluster ${test_dir}/conf/${cluster}.yml +wait_for_running ${cluster}-rs0 1 "false" +echo "enabling backups" +kubectl_bin patch psmdb ${cluster} --type=merge -p '{"spec":{"backup":{"enabled":true}}}' +echo "sleeping for 7 seconds..." +sleep 7 +echo "changing rs0 resources" +kubectl_bin patch psmdb ${cluster} --type=json -p '[{"op":"replace","path":"/spec/replsets/0/resources/limits/cpu","value":"550m"}]' + +wait_for_running ${cluster}-rs0 3 +wait_cluster_consistency "${cluster}" + +desc 'test passed' + +destroy $namespace diff --git a/e2e-tests/run-pr.csv b/e2e-tests/run-pr.csv index fe55ed9ff..1c60d0407 100644 --- a/e2e-tests/run-pr.csv +++ b/e2e-tests/run-pr.csv @@ -31,6 +31,7 @@ operator-self-healing-chaos pitr pitr-sharded pitr-physical +preinit-updates pvc-resize recover-no-primary replset-overrides diff --git a/e2e-tests/run-release.csv b/e2e-tests/run-release.csv index 8165ad96b..ad56cdda8 100644 --- a/e2e-tests/run-release.csv +++ b/e2e-tests/run-release.csv @@ -32,6 +32,7 @@ operator-self-healing-chaos pitr pitr-sharded pitr-physical +preinit-updates pvc-resize recover-no-primary replset-overrides diff --git a/e2e-tests/version-service/conf/crd.yaml b/e2e-tests/version-service/conf/crd.yaml index dcbf4998d..693bb2209 100644 --- a/e2e-tests/version-service/conf/crd.yaml +++ b/e2e-tests/version-service/conf/crd.yaml @@ -19545,14 +19545,16 @@ spec: initialized: type: boolean members: - items: + additionalProperties: properties: name: type: string - version: + state: + type: integer + stateStr: type: string type: object - type: array + type: object message: type: string ready: diff --git a/pkg/apis/psmdb/v1/psmdb_types.go b/pkg/apis/psmdb/v1/psmdb_types.go index fb87dea62..5c1e8c834 100644 --- a/pkg/apis/psmdb/v1/psmdb_types.go +++ b/pkg/apis/psmdb/v1/psmdb_types.go @@ -28,6 +28,7 @@ import ( "github.com/percona/percona-backup-mongodb/pbm/defs" "github.com/percona/percona-server-mongodb-operator/pkg/mcs" + "github.com/percona/percona-server-mongodb-operator/pkg/psmdb/mongo" "github.com/percona/percona-server-mongodb-operator/pkg/util/numstr" "github.com/percona/percona-server-mongodb-operator/version" ) @@ -234,8 +235,9 @@ type UpgradeOptions struct { } type ReplsetMemberStatus struct { - Name string `json:"name,omitempty"` - Version string `json:"version,omitempty"` + Name string `json:"name,omitempty"` + State mongo.MemberState `json:"state,omitempty"` + StateStr string `json:"stateStr,omitempty"` } type MongosStatus struct { @@ -246,8 +248,8 @@ type MongosStatus struct { } type ReplsetStatus struct { - Members []*ReplsetMemberStatus `json:"members,omitempty"` - ClusterRole ClusterRole `json:"clusterRole,omitempty"` + Members map[string]ReplsetMemberStatus `json:"members,omitempty"` + ClusterRole ClusterRole `json:"clusterRole,omitempty"` Initialized bool `json:"initialized,omitempty"` AddedAsShard *bool `json:"added_as_shard,omitempty"` diff --git a/pkg/apis/psmdb/v1/zz_generated.deepcopy.go b/pkg/apis/psmdb/v1/zz_generated.deepcopy.go index d77f13d4d..873fe159b 100644 --- a/pkg/apis/psmdb/v1/zz_generated.deepcopy.go +++ b/pkg/apis/psmdb/v1/zz_generated.deepcopy.go @@ -1633,13 +1633,9 @@ func (in *ReplsetStatus) DeepCopyInto(out *ReplsetStatus) { *out = *in if in.Members != nil { in, out := &in.Members, &out.Members - *out = make([]*ReplsetMemberStatus, len(*in)) - for i := range *in { - if (*in)[i] != nil { - in, out := &(*in)[i], &(*out)[i] - *out = new(ReplsetMemberStatus) - **out = **in - } + *out = make(map[string]ReplsetMemberStatus, len(*in)) + for key, val := range *in { + (*out)[key] = val } } if in.AddedAsShard != nil { diff --git a/pkg/controller/perconaservermongodb/finalizers.go b/pkg/controller/perconaservermongodb/finalizers.go index 9eb072fec..544d475da 100644 --- a/pkg/controller/perconaservermongodb/finalizers.go +++ b/pkg/controller/perconaservermongodb/finalizers.go @@ -120,7 +120,7 @@ func (r *ReconcilePerconaServerMongoDB) deletePSMDBPods(ctx context.Context, cr rsDeleted = false switch err { case errWaitingTermination, errWaitingFirstPrimary: - log.Error(err, "rs", rs.Name) + log.Info("deleting rs pods", "rs", rs.Name, "status", err.Error()) continue default: log.Error(err, "failed to delete rs pods", "rs", rs.Name) diff --git a/pkg/controller/perconaservermongodb/mgo.go b/pkg/controller/perconaservermongodb/mgo.go index be0cc6d6b..677650cea 100644 --- a/pkg/controller/perconaservermongodb/mgo.go +++ b/pkg/controller/perconaservermongodb/mgo.go @@ -27,7 +27,7 @@ import ( var errReplsetLimit = fmt.Errorf("maximum replset member (%d) count reached", mongo.MaxMembers) -func (r *ReconcilePerconaServerMongoDB) reconcileCluster(ctx context.Context, cr *api.PerconaServerMongoDB, replset *api.ReplsetSpec, mongosPods []corev1.Pod) (api.AppState, error) { +func (r *ReconcilePerconaServerMongoDB) reconcileCluster(ctx context.Context, cr *api.PerconaServerMongoDB, replset *api.ReplsetSpec, mongosPods []corev1.Pod) (api.AppState, map[string]api.ReplsetMemberStatus, error) { log := logf.FromContext(ctx) replsetSize := replset.Size @@ -42,20 +42,20 @@ func (r *ReconcilePerconaServerMongoDB) reconcileCluster(ctx context.Context, cr restoreInProgress, err := r.restoreInProgress(ctx, cr, replset) if err != nil { - return api.AppStateError, errors.Wrap(err, "check if restore in progress") + return api.AppStateError, nil, errors.Wrap(err, "check if restore in progress") } if restoreInProgress { - return api.AppStateInit, nil + return api.AppStateInit, nil, nil } if replsetSize == 0 { - return api.AppStateReady, nil + return api.AppStateReady, nil, nil } pods, err := psmdb.GetRSPods(ctx, r.client, cr, replset.Name) if err != nil { - return api.AppStateInit, errors.Wrap(err, "failed to get replset pods") + return api.AppStateInit, nil, errors.Wrap(err, "failed to get replset pods") } // all pods needs to be scheduled to reconcile @@ -63,33 +63,33 @@ func (r *ReconcilePerconaServerMongoDB) reconcileCluster(ctx context.Context, cr for _, pod := range pods.Items { for _, containerStatus := range pod.Status.ContainerStatuses { if containerStatus.State.Waiting != nil && containerStatus.State.Waiting.Reason == "CrashLoopBackOff" { - return api.AppStateError, errors.Errorf("pod %s is in CrashLoopBackOff state", pod.Name) + return api.AppStateError, nil, errors.Errorf("pod %s is in CrashLoopBackOff state", pod.Name) } } } log.Info("Waiting for the pods", "replset", replset.Name, "size", replsetSize, "pods", len(pods.Items)) - return api.AppStateInit, nil + return api.AppStateInit, nil, nil } if cr.MCSEnabled() { seList, err := psmdb.GetExportedServices(ctx, r.client, cr) if err != nil { - return api.AppStateError, errors.Wrap(err, "get exported services") + return api.AppStateError, nil, errors.Wrap(err, "get exported services") } if len(seList.Items) == 0 { log.Info("waiting for service exports") - return api.AppStateInit, nil + return api.AppStateInit, nil, nil } for _, se := range seList.Items { imported, err := psmdb.IsServiceImported(ctx, r.client, cr, se.Name) if err != nil { - return api.AppStateError, errors.Wrapf(err, "check if service is imported for %s", se.Name) + return api.AppStateError, nil, errors.Wrapf(err, "check if service is imported for %s", se.Name) } if !imported { log.Info("waiting for service import", "replset", replset.Name, "serviceExport", se.Name) - return api.AppStateInit, nil + return api.AppStateInit, nil, nil } } } @@ -97,7 +97,7 @@ func (r *ReconcilePerconaServerMongoDB) reconcileCluster(ctx context.Context, cr cli, err := r.mongoClientWithRole(ctx, cr, replset, api.RoleClusterAdmin) if err != nil { if cr.Spec.Unmanaged { - return api.AppStateInit, nil + return api.AppStateInit, nil, nil } if cr.Status.Replsets[replset.Name].Initialized { if errors.Is(err, topology.ErrServerSelectionTimeout) && strings.Contains(err.Error(), "ReplicaSetNoPrimary") { @@ -105,27 +105,28 @@ func (r *ReconcilePerconaServerMongoDB) reconcileCluster(ctx context.Context, cr err := r.handleReplicaSetNoPrimary(ctx, cr, replset, pods.Items) if err != nil { - return api.AppStateError, errors.Wrap(err, "handle ReplicaSetNoPrimary") + return api.AppStateError, nil, errors.Wrap(err, "handle ReplicaSetNoPrimary") } - return api.AppStateError, nil + return api.AppStateError, nil, nil } - return api.AppStateError, errors.Wrap(err, "dial") + return api.AppStateError, nil, errors.Wrap(err, "dial") } - err := r.handleReplsetInit(ctx, cr, replset, pods.Items) + pod, primary, err := r.handleReplsetInit(ctx, cr, replset, pods.Items) if err != nil { - return api.AppStateInit, errors.Wrap(err, "handleReplsetInit") + return api.AppStateInit, nil, errors.Wrap(err, "handleReplsetInit") } err = r.createOrUpdateSystemUsers(ctx, cr, replset) if err != nil { - return api.AppStateInit, errors.Wrap(err, "create system users") + return api.AppStateInit, nil, errors.Wrap(err, "create system users") } rs := cr.Status.Replsets[replset.Name] rs.Initialized = true + rs.Members = map[string]api.ReplsetMemberStatus{pod.Name: *primary} cr.Status.Replsets[replset.Name] = rs cr.Status.AddCondition(api.ClusterCondition{ @@ -135,7 +136,7 @@ func (r *ReconcilePerconaServerMongoDB) reconcileCluster(ctx context.Context, cr LastTransitionTime: metav1.NewTime(time.Now()), }) - return api.AppStateInit, nil + return api.AppStateInit, rs.Members, nil } defer func() { if err := cli.Disconnect(ctx); err != nil { @@ -146,16 +147,16 @@ func (r *ReconcilePerconaServerMongoDB) reconcileCluster(ctx context.Context, cr if cr.Spec.Unmanaged { status, err := cli.RSStatus(ctx) if err != nil { - return api.AppStateError, errors.Wrap(err, "failed to get rs status") + return api.AppStateError, nil, errors.Wrap(err, "failed to get rs status") } if status.Primary() == nil { - return api.AppStateInit, nil + return api.AppStateInit, nil, nil } - return api.AppStateReady, nil + return api.AppStateReady, nil, nil } err = r.createOrUpdateSystemUsers(ctx, cr, replset) if err != nil { - return api.AppStateInit, errors.Wrap(err, "create system users") + return api.AppStateInit, nil, errors.Wrap(err, "create system users") } // this can happen if cluster is initialized but status update failed @@ -171,12 +172,12 @@ func (r *ReconcilePerconaServerMongoDB) reconcileCluster(ctx context.Context, cr LastTransitionTime: metav1.NewTime(time.Now()), }) - return api.AppStateInit, nil + return api.AppStateInit, nil, nil } rstRunning, err := r.isRestoreRunning(ctx, cr) if err != nil { - return api.AppStateInit, errors.Wrap(err, "failed to check running restore") + return api.AppStateInit, nil, errors.Wrap(err, "failed to check running restore") } if cr.Spec.Sharding.Enabled && @@ -190,7 +191,7 @@ func (r *ReconcilePerconaServerMongoDB) reconcileCluster(ctx context.Context, cr mongosSession, err := r.mongosClientWithRole(ctx, cr, api.RoleClusterAdmin) if err != nil { - return api.AppStateError, errors.Wrap(err, "failed to get mongos connection") + return api.AppStateError, nil, errors.Wrap(err, "failed to get mongos connection") } defer func() { @@ -203,7 +204,7 @@ func (r *ReconcilePerconaServerMongoDB) reconcileCluster(ctx context.Context, cr err = mongosSession.SetDefaultRWConcern(ctx, mongo.DefaultReadConcern, mongo.DefaultWriteConcern) // SetDefaultRWConcern introduced in MongoDB 4.4 if err != nil && !strings.Contains(err.Error(), "CommandNotFound") { - return api.AppStateError, errors.Wrap(err, "set default RW concern") + return api.AppStateError, nil, errors.Wrap(err, "set default RW concern") } rsName := replset.Name @@ -214,14 +215,14 @@ func (r *ReconcilePerconaServerMongoDB) reconcileCluster(ctx context.Context, cr in, err := inShard(ctx, mongosSession, rsName) if err != nil { - return api.AppStateError, errors.Wrap(err, "get shard") + return api.AppStateError, nil, errors.Wrap(err, "get shard") } if !in { log.Info("adding rs to shard", "rs", rsName) err := r.handleRsAddToShard(ctx, cr, replset, pods.Items[0], mongosPods[0]) if err != nil { - return api.AppStateError, errors.Wrap(err, "add shard") + return api.AppStateError, nil, errors.Wrap(err, "add shard") } log.Info("added to shard", "rs", rsName) @@ -238,22 +239,22 @@ func (r *ReconcilePerconaServerMongoDB) reconcileCluster(ctx context.Context, cr err := cli.SetDefaultRWConcern(ctx, mongo.DefaultReadConcern, mongo.DefaultWriteConcern) // SetDefaultRWConcern introduced in MongoDB 4.4 if err != nil && !strings.Contains(err.Error(), "CommandNotFound") { - return api.AppStateError, errors.Wrap(err, "set default RW concern") + return api.AppStateError, nil, errors.Wrap(err, "set default RW concern") } } - membersLive, err := r.updateConfigMembers(ctx, cli, cr, replset) + rsMembers, liveMembers, err := r.updateConfigMembers(ctx, cli, cr, replset) if err != nil { - return api.AppStateError, errors.Wrap(err, "failed to update config members") + return api.AppStateError, nil, errors.Wrap(err, "failed to update config members") } - if membersLive == len(pods.Items) { - return api.AppStateReady, nil + if liveMembers == len(pods.Items) { + return api.AppStateReady, rsMembers, nil } - log.V(1).Info("Replset is not ready", "liveMembers", membersLive, "pods", len(pods.Items)) + log.V(1).Info("Replset is not ready", "liveMembers", liveMembers, "pods", len(pods.Items)) - return api.AppStateInit, nil + return api.AppStateInit, rsMembers, nil } func (r *ReconcilePerconaServerMongoDB) getConfigMemberForPod(ctx context.Context, cr *api.PerconaServerMongoDB, rs *api.ReplsetSpec, id int, pod *corev1.Pod) (mongo.ConfigMember, error) { @@ -364,10 +365,11 @@ func (r *ReconcilePerconaServerMongoDB) getConfigMemberForExternalNode(id int, e return member } -func (r *ReconcilePerconaServerMongoDB) updateConfigMembers(ctx context.Context, cli mongo.Client, cr *api.PerconaServerMongoDB, rs *api.ReplsetSpec) (int, error) { +func (r *ReconcilePerconaServerMongoDB) updateConfigMembers(ctx context.Context, cli mongo.Client, cr *api.PerconaServerMongoDB, rs *api.ReplsetSpec) (map[string]api.ReplsetMemberStatus, int, error) { log := logf.FromContext(ctx) // Primary with a Secondary and an Arbiter (PSA) unsafePSA := false + rsMembers := make(map[string]api.ReplsetMemberStatus) if cr.CompareVersion("1.15.0") <= 0 { unsafePSA = cr.Spec.UnsafeConf && rs.Arbiter.Enabled && rs.Arbiter.Size == 1 && !rs.NonVoting.Enabled && rs.Size == 2 @@ -377,17 +379,17 @@ func (r *ReconcilePerconaServerMongoDB) updateConfigMembers(ctx context.Context, pods, err := psmdb.GetRSPods(ctx, r.client, cr, rs.Name) if err != nil { - return 0, errors.Wrap(err, "get rs pods") + return rsMembers, 0, errors.Wrap(err, "get rs pods") } cnf, err := cli.ReadConfig(ctx) if err != nil { - return 0, errors.Wrap(err, "get replset config") + return rsMembers, 0, errors.Wrap(err, "get replset config") } rsStatus, err := cli.RSStatus(ctx) if err != nil { - return 0, errors.Wrap(err, "get replset status") + return rsMembers, 0, errors.Wrap(err, "get replset status") } members := mongo.ConfigMembers{} @@ -399,7 +401,7 @@ func (r *ReconcilePerconaServerMongoDB) updateConfigMembers(ctx context.Context, member, err := r.getConfigMemberForPod(ctx, cr, rs, key, &pod) if err != nil { - return 0, errors.Wrapf(err, "get config member for pod %s", pod.Name) + return rsMembers, 0, errors.Wrapf(err, "get config member for pod %s", pod.Name) } members = append(members, member) @@ -424,7 +426,7 @@ func (r *ReconcilePerconaServerMongoDB) updateConfigMembers(ctx context.Context, if member.State == mongo.MemberStatePrimary { log.Info("Stepping down the primary", "member", member.Name) if err := cli.StepDown(ctx, 60, false); err != nil { - return 0, errors.Wrap(err, "step down primary") + return rsMembers, 0, errors.Wrap(err, "step down primary") } } @@ -433,10 +435,10 @@ func (r *ReconcilePerconaServerMongoDB) updateConfigMembers(ctx context.Context, log.Info("Fixing hostname of member", "replset", rs.Name, "id", member.Id, "member", member.Name) if err := cli.WriteConfig(ctx, cnf, false); err != nil { - return 0, errors.Wrap(err, "fix member hostname: write mongo config") + return rsMembers, 0, errors.Wrap(err, "fix member hostname: write mongo config") } - return 0, nil + return rsMembers, 0, nil } if cnf.Members.FixMemberConfigs(ctx, members) { @@ -445,7 +447,7 @@ func (r *ReconcilePerconaServerMongoDB) updateConfigMembers(ctx context.Context, log.Info("Fixing member configurations", "replset", rs.Name) if err := cli.WriteConfig(ctx, cnf, false); err != nil { - return 0, errors.Wrap(err, "fix member configurations: write mongo config") + return rsMembers, 0, errors.Wrap(err, "fix member configurations: write mongo config") } } @@ -456,7 +458,7 @@ func (r *ReconcilePerconaServerMongoDB) updateConfigMembers(ctx context.Context, err = cli.WriteConfig(ctx, cnf, false) if err != nil { - return 0, errors.Wrap(err, "delete: write mongo config") + return rsMembers, 0, errors.Wrap(err, "delete: write mongo config") } } @@ -467,7 +469,7 @@ func (r *ReconcilePerconaServerMongoDB) updateConfigMembers(ctx context.Context, err = cli.WriteConfig(ctx, cnf, false) if err != nil { - return 0, errors.Wrap(err, "add new: write mongo config") + return rsMembers, 0, errors.Wrap(err, "add new: write mongo config") } } @@ -478,7 +480,7 @@ func (r *ReconcilePerconaServerMongoDB) updateConfigMembers(ctx context.Context, err = cli.WriteConfig(ctx, cnf, false) if err != nil { - return 0, errors.Wrap(err, "update external nodes: write mongo config") + return rsMembers, 0, errors.Wrap(err, "update external nodes: write mongo config") } } @@ -491,44 +493,45 @@ func (r *ReconcilePerconaServerMongoDB) updateConfigMembers(ctx context.Context, err := cli.WriteConfig(ctx, cnf, false) if err != nil { - return 0, errors.Wrap(err, "set votes: write mongo config") + return rsMembers, 0, errors.Wrap(err, "set votes: write mongo config") } } rsStatus, err = cli.RSStatus(ctx) if err != nil { - return 0, errors.Wrap(err, "unable to get replset members") + return rsMembers, 0, errors.Wrap(err, "unable to get replset members") } - membersLive := 0 + liveMembers := 0 for _, member := range rsStatus.Members { var tags mongo.ReplsetTags + for i := range cnf.Members { if member.Id == cnf.Members[i].ID { tags = cnf.Members[i].Tags break } } + if _, ok := tags["external"]; ok { continue } + if podName, ok := tags["podName"]; ok { + rsMembers[podName] = api.ReplsetMemberStatus{ + Name: member.Name, + State: member.State, + StateStr: member.StateStr, + } + } + switch member.State { case mongo.MemberStatePrimary, mongo.MemberStateSecondary, mongo.MemberStateArbiter: - membersLive++ - case mongo.MemberStateStartup, - mongo.MemberStateStartup2, - mongo.MemberStateRecovering, - mongo.MemberStateRollback, - mongo.MemberStateDown, - mongo.MemberStateUnknown: - - return 0, nil - default: - return 0, errors.Errorf("undefined state of the replset member %s: %v", member.Name, member.State) + liveMembers++ } } - return membersLive, nil + + return rsMembers, liveMembers, nil } func inShard(ctx context.Context, client mongo.Client, rsName string) (bool, error) { @@ -639,7 +642,7 @@ func (r *ReconcilePerconaServerMongoDB) handleRsAddToShard(ctx context.Context, // This must be ran from within the running container to utilize the MongoDB Localhost Exception. // // See: https://www.mongodb.com/docs/manual/core/localhost-exception/ -func (r *ReconcilePerconaServerMongoDB) handleReplsetInit(ctx context.Context, cr *api.PerconaServerMongoDB, replset *api.ReplsetSpec, pods []corev1.Pod) error { +func (r *ReconcilePerconaServerMongoDB) handleReplsetInit(ctx context.Context, cr *api.PerconaServerMongoDB, replset *api.ReplsetSpec, pods []corev1.Pod) (*corev1.Pod, *api.ReplsetMemberStatus, error) { log := logf.FromContext(ctx) for _, pod := range pods { @@ -657,19 +660,19 @@ func (r *ReconcilePerconaServerMongoDB) handleReplsetInit(ctx context.Context, c member, err := r.getConfigMemberForPod(ctx, cr, replset, 0, &pod) if err != nil { - return errors.Wrapf(err, "get config member for pod %s", pod.Name) + return nil, nil, errors.Wrapf(err, "get config member for pod %s", pod.Name) } memberBytes, err := json.Marshal(member) if err != nil { - return errors.Wrap(err, "marshall member to json") + return nil, nil, errors.Wrap(err, "marshall member to json") } var errb, outb bytes.Buffer err = r.clientcmd.Exec(ctx, &pod, "mongod", []string{"mongod", "--version"}, nil, &outb, &errb, false) if err != nil { - return fmt.Errorf("exec --version: %v / %s / %s", err, outb.String(), errb.String()) + return nil, nil, fmt.Errorf("exec --version: %v / %s / %s", err, outb.String(), errb.String()) } mongoCmd := "mongosh" @@ -703,7 +706,7 @@ func (r *ReconcilePerconaServerMongoDB) handleReplsetInit(ctx context.Context, c outb.Reset() err = r.clientcmd.Exec(ctx, &pod, "mongod", cmd, nil, &outb, &errb, false) if err != nil { - return fmt.Errorf("exec rs.initiate: %v / %s / %s", err, outb.String(), errb.String()) + return nil, nil, fmt.Errorf("exec rs.initiate: %v / %s / %s", err, outb.String(), errb.String()) } log.Info("replset initialized", "replset", replsetName, "pod", pod.Name) @@ -712,7 +715,7 @@ func (r *ReconcilePerconaServerMongoDB) handleReplsetInit(ctx context.Context, c log.Info("creating user admin", "replset", replsetName, "pod", pod.Name, "user", api.RoleUserAdmin) userAdmin, err := getInternalCredentials(ctx, r.client, cr, api.RoleUserAdmin) if err != nil { - return errors.Wrap(err, "failed to get userAdmin credentials") + return nil, nil, errors.Wrap(err, "failed to get userAdmin credentials") } cmd[2] = fmt.Sprintf(`%s --eval %s`, mongoCmd, mongoInitAdminUser(userAdmin.Username, userAdmin.Password)) @@ -720,14 +723,18 @@ func (r *ReconcilePerconaServerMongoDB) handleReplsetInit(ctx context.Context, c outb.Reset() err = r.clientcmd.Exec(ctx, &pod, "mongod", cmd, nil, &outb, &errb, false) if err != nil { - return fmt.Errorf("exec add admin user: %v / %s / %s", err, outb.String(), errb.String()) + return nil, nil, fmt.Errorf("exec add admin user: %v / %s / %s", err, outb.String(), errb.String()) } log.Info("user admin created", "replset", replsetName, "pod", pod.Name, "user", api.RoleUserAdmin) - return nil + return &pod, &api.ReplsetMemberStatus{ + Name: member.Host, + State: mongo.MemberStatePrimary, + StateStr: mongo.MemberStateStrings[mongo.MemberStatePrimary], + }, nil } - return errNoRunningMongodContainers + return nil, nil, errNoRunningMongodContainers } func (r *ReconcilePerconaServerMongoDB) handleReplicaSetNoPrimary(ctx context.Context, cr *api.PerconaServerMongoDB, replset *api.ReplsetSpec, pods []corev1.Pod) error { diff --git a/pkg/controller/perconaservermongodb/psmdb_controller.go b/pkg/controller/perconaservermongodb/psmdb_controller.go index b735f3af4..fed64c05d 100644 --- a/pkg/controller/perconaservermongodb/psmdb_controller.go +++ b/pkg/controller/perconaservermongodb/psmdb_controller.go @@ -546,7 +546,7 @@ func (r *ReconcilePerconaServerMongoDB) reconcileReplsets(ctx context.Context, c var errs []error clusterStatus := api.AppStateNone for _, replset := range repls { - replsetStatus, err := r.reconcileCluster(ctx, cr, replset, mongosPods.Items) + replsetStatus, members, err := r.reconcileCluster(ctx, cr, replset, mongosPods.Items) if err != nil { log.Error(err, "failed to reconcile cluster", "replset", replset.Name) errs = append(errs, err) @@ -565,6 +565,15 @@ func (r *ReconcilePerconaServerMongoDB) reconcileReplsets(ctx context.Context, c break } } + + if rs, ok := cr.Status.Replsets[replset.Name]; ok { + rs.Members = make(map[string]api.ReplsetMemberStatus) + for pod, member := range members { + rs.Members[pod] = member + } + log.V(1).Info("Replset members", "rs", replset.Name, "initialized", rs.Initialized, "members", rs.Members) + cr.Status.Replsets[replset.Name] = rs + } } return clusterStatus, stderrors.Join(errs...) } @@ -650,7 +659,7 @@ func (r *ReconcilePerconaServerMongoDB) reconcilePause(ctx context.Context, cr * for _, rs := range cr.Spec.Replsets { if cr.Status.State == api.AppStateStopping { - log.Info("Pausing cluster", "replset", rs.Name) + log.Info("pausing cluster", "replset", rs.Name) } rs.Arbiter.Enabled = false rs.NonVoting.Enabled = false @@ -658,7 +667,6 @@ func (r *ReconcilePerconaServerMongoDB) reconcilePause(ctx context.Context, cr * if err := r.deletePSMDBPods(ctx, cr); err != nil { if err == errWaitingTermination { - log.Info("pausing cluster", "error", err.Error()) return nil } return errors.Wrap(err, "delete psmdb pods") diff --git a/pkg/controller/perconaservermongodb/smart.go b/pkg/controller/perconaservermongodb/smart.go index 3ef22c411..8fbeecd4b 100644 --- a/pkg/controller/perconaservermongodb/smart.go +++ b/pkg/controller/perconaservermongodb/smart.go @@ -26,7 +26,8 @@ import ( func (r *ReconcilePerconaServerMongoDB) smartUpdate(ctx context.Context, cr *api.PerconaServerMongoDB, sfs *appsv1.StatefulSet, replset *api.ReplsetSpec, ) error { - log := logf.FromContext(ctx) + log := logf.FromContext(ctx).WithName("SmartUpdate").WithValues("statefulset", sfs.Name, "replset", replset.Name) + if replset.Size == 0 { return nil } @@ -85,7 +86,7 @@ func (r *ReconcilePerconaServerMongoDB) smartUpdate(ctx context.Context, cr *api } } - log.Info("StatefulSet is changed, starting smart update", "name", sfs.Name) + log.Info("StatefulSet is changed, starting smart update") if sfs.Status.ReadyReplicas < sfs.Status.Replicas { log.Info("can't start/continue 'SmartUpdate': waiting for all replicas are ready") @@ -112,7 +113,8 @@ func (r *ReconcilePerconaServerMongoDB) smartUpdate(ctx context.Context, cr *api } if rsStatus, ok := cr.Status.Replsets[replset.Name]; !ok || !rsStatus.Initialized { - log.Info("Replset wasn't initialized. Continuing smart update", "replset", replset.Name) + log.Info("replset wasn't initialized. Continuing smart update") + for _, pod := range list.Items { log.Info("apply changes to pod", "pod", pod.Name) @@ -120,10 +122,26 @@ func (r *ReconcilePerconaServerMongoDB) smartUpdate(ctx context.Context, cr *api return err } } - log.Info("smart update finished for statefulset", "statefulset", sfs.Name) + + log.Info("smart update finished for statefulset") + return nil } + if rsStatus, ok := cr.Status.Replsets[replset.Name]; ok { + for _, pod := range list.Items { + if _, ok := rsStatus.Members[pod.Name]; !ok { + log.Info("pod is not a member of replset, updating it", "pod", pod.Name) + + if err := updatePod(&pod); err != nil { + return err + } + + return nil + } + } + } + isBackupRunning, err := r.isBackupRunning(ctx, cr) if err != nil { return errors.Wrap(err, "failed to check active backups") @@ -167,6 +185,7 @@ func (r *ReconcilePerconaServerMongoDB) smartUpdate(ctx context.Context, cr *api } if isPrimary { primaryPod = pod + log.Info("primary pod detected", "pod", pod.Name) continue } @@ -212,7 +231,7 @@ func (r *ReconcilePerconaServerMongoDB) smartUpdate(ctx context.Context, cr *api } } - log.Info("smart update finished for statefulset", "statefulset", sfs.Name) + log.Info("smart update finished for statefulset") return nil } diff --git a/pkg/controller/perconaservermongodb/status.go b/pkg/controller/perconaservermongodb/status.go index 1209e4abc..a28d61ba7 100644 --- a/pkg/controller/perconaservermongodb/status.go +++ b/pkg/controller/perconaservermongodb/status.go @@ -93,6 +93,7 @@ func (r *ReconcilePerconaServerMongoDB) updateStatus(ctx context.Context, cr *ap currentRSstatus = api.ReplsetStatus{} } + status.Members = currentRSstatus.Members status.Initialized = currentRSstatus.Initialized status.AddedAsShard = currentRSstatus.AddedAsShard