Skip to content

Commit

Permalink
adjust log levels to make production debug reliable (#307)
Browse files Browse the repository at this point in the history
Co-authored-by: Ryan Zhang <zhangryan@microsoft.com>
  • Loading branch information
ryanzhang-oss and Ryan Zhang authored Sep 23, 2022
1 parent b085bed commit 52c56b2
Show file tree
Hide file tree
Showing 14 changed files with 95 additions and 95 deletions.
4 changes: 2 additions & 2 deletions pkg/authtoken/providers/azure/azure_msi.go
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@ func (a *azureAuthTokenProvider) FetchToken(ctx context.Context) (interfaces.Aut
token := interfaces.AuthToken{}
opts := &azidentity.ManagedIdentityCredentialOptions{ID: azidentity.ClientID(a.clientID)}

klog.V(5).InfoS("FetchToken", "client ID", a.clientID)
klog.V(2).InfoS("FetchToken", "client ID", a.clientID)
credential, err := azidentity.NewManagedIdentityCredential(opts)
if err != nil {
return token, errors.Wrap(err, "failed to create managed identity cred")
Expand All @@ -46,7 +46,7 @@ func (a *azureAuthTokenProvider) FetchToken(ctx context.Context) (interfaces.Aut
func(err error) bool {
return ctx.Err() == nil
}, func() error {
klog.V(5).InfoS("GetToken start", "credential", credential)
klog.V(2).InfoS("GetToken start", "credential", credential)
azToken, err = credential.GetToken(ctx, policy.TokenRequestOptions{
Scopes: []string{aksScope},
})
Expand Down
2 changes: 1 addition & 1 deletion pkg/authtoken/providers/secret/k8s_secret.go
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,7 @@ func New(secretName, namespace string) (interfaces.AuthTokenProvider, error) {
}

func (s *secretAuthTokenProvider) FetchToken(ctx context.Context) (interfaces.AuthToken, error) {
klog.V(3).InfoS("fetching token from secret", "secret", klog.KRef(s.secretName, s.secretNamespace))
klog.V(2).InfoS("fetching token from secret", "secret", klog.KRef(s.secretName, s.secretNamespace))
token := interfaces.AuthToken{}
secret, err := s.fetchSecret(ctx)
if err != nil {
Expand Down
4 changes: 2 additions & 2 deletions pkg/authtoken/token_refresher.go
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,7 @@ var (
)

func (at *Refresher) callFetchToken(ctx context.Context) (interfaces.AuthToken, error) {
klog.V(5).InfoS("FetchToken start")
klog.V(2).InfoS("FetchToken start")
deadline := time.Now().Add(DefaultRefreshDuration)
fetchTokenContext, cancel := context.WithDeadline(ctx, deadline)
defer cancel()
Expand All @@ -67,7 +67,7 @@ func (at *Refresher) RefreshToken(ctx context.Context) error {
continue
}

klog.V(5).InfoS("WriteToken start")
klog.V(2).InfoS("WriteToken start")
err = at.writer.WriteToken(token)
if err != nil {
klog.ErrorS(err, "Failed to WriteToken")
Expand Down
2 changes: 1 addition & 1 deletion pkg/authtoken/token_writer.go
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,6 @@ func (w *Writer) WriteToken(token interfaces.AuthToken) error {
if err != nil {
return errors.Wrap(err, "cannot write the refresh token")
}
klog.V(3).InfoS("token has been saved to the file successfully")
klog.V(2).InfoS("token has been saved to the file successfully")
return nil
}
10 changes: 5 additions & 5 deletions pkg/controllers/clusterresourceplacement/cluster_selector.go
Original file line number Diff line number Diff line change
Expand Up @@ -35,13 +35,13 @@ func (r *Reconciler) selectClusters(placement *fleetv1alpha1.ClusterResourcePlac
if err != nil {
return nil, err
}
klog.V(4).InfoS("we select all the available clusters in the fleet without a policy",
klog.V(2).InfoS("we select all the available clusters in the fleet without a policy",
"placement", placement.Name, "clusters", clusterNames)
return
}
// a fix list of clusters set
if len(placement.Spec.Policy.ClusterNames) != 0 {
klog.V(4).InfoS("use the cluster names provided as the list of cluster we select",
klog.V(2).InfoS("use the cluster names provided as the list of cluster we select",
"placement", placement.Name, "clusters", placement.Spec.Policy.ClusterNames)
clusterNames, err = r.getClusters(placement.Spec.Policy.ClusterNames)
if err != nil {
Expand All @@ -56,7 +56,7 @@ func (r *Reconciler) selectClusters(placement *fleetv1alpha1.ClusterResourcePlac
if err != nil {
return nil, err
}
klog.V(4).InfoS("we select all the available clusters in the fleet without a cluster affinity",
klog.V(2).InfoS("we select all the available clusters in the fleet without a cluster affinity",
"placement", placement.Name, "clusters", clusterNames)
return
}
Expand All @@ -71,13 +71,13 @@ func (r *Reconciler) selectClusters(placement *fleetv1alpha1.ClusterResourcePlac
if err != nil {
return nil, errors.Wrap(err, fmt.Sprintf("selector = %v", clusterSelector.LabelSelector))
}
klog.V(4).InfoS("selector matches some cluster", "clusterNum", len(matchClusters), "placement", placement.Name, "selector", clusterSelector.LabelSelector)
klog.V(2).InfoS("selector matches some cluster", "clusterNum", len(matchClusters), "placement", placement.Name, "selector", clusterSelector.LabelSelector)
for _, clusterName := range matchClusters {
selectedClusters[clusterName] = true
}
}
for cluster := range selectedClusters {
klog.V(4).InfoS("matched a cluster", "cluster", cluster, "placement", placement.Name)
klog.V(2).InfoS("matched a cluster", "cluster", cluster, "placement", placement.Name)
clusterNames = append(clusterNames, cluster)
}
return clusterNames, nil
Expand Down
22 changes: 11 additions & 11 deletions pkg/controllers/clusterresourceplacement/placement_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -92,7 +92,7 @@ func (r *Reconciler) Reconcile(ctx context.Context, key controller.QueueKey) (ct
return r.removeAllWorks(ctx, placementOld)
}

klog.V(3).InfoS("Successfully selected clusters", "placement", placementOld.Name, "number of clusters", len(selectedClusters))
klog.V(2).InfoS("Successfully selected clusters", "placement", placementOld.Name, "number of clusters", len(selectedClusters))

// select the new resources and record the result in the placementNew status
manifests, scheduleErr := r.selectResources(ctx, placementNew)
Expand All @@ -107,7 +107,7 @@ func (r *Reconciler) Reconcile(ctx context.Context, key controller.QueueKey) (ct
klog.V(2).InfoS("No resources match the placement", "placement", placeRef)
return r.removeAllWorks(ctx, placementOld)
}
klog.V(3).InfoS("Successfully selected resources", "placement", placementOld.Name, "number of resources", len(manifests))
klog.V(2).InfoS("Successfully selected resources", "placement", placementOld.Name, "number of resources", len(manifests))

// persist union of the all the selected resources and clusters between placementNew and placementOld so that we won't
// get orphaned resource/cluster if the reconcile loops stops between work creation and the placement status persisted
Expand All @@ -118,7 +118,7 @@ func (r *Reconciler) Reconcile(ctx context.Context, key controller.QueueKey) (ct
_ = r.Client.Status().Update(ctx, placementOld, client.FieldOwner(utils.PlacementFieldManagerName))
return ctrl.Result{}, scheduleErr
}
klog.V(3).InfoS("Successfully persisted the intermediate scheduling result", "placement", placementOld.Name,
klog.V(2).InfoS("Successfully persisted the intermediate scheduling result", "placement", placementOld.Name,
"totalClusters", totalCluster, "totalResources", totalResources)
// pick up the new version so placementNew can continue to update
placementNew.SetResourceVersion(placementOld.GetResourceVersion())
Expand All @@ -131,7 +131,7 @@ func (r *Reconciler) Reconcile(ctx context.Context, key controller.QueueKey) (ct
_ = r.Client.Status().Update(ctx, placementOld, client.FieldOwner(utils.PlacementFieldManagerName))
return ctrl.Result{}, scheduleErr
}
klog.V(3).InfoS("Successfully scheduled work resources", "placement", placementOld.Name, "number of clusters", len(selectedClusters))
klog.V(2).InfoS("Successfully scheduled work resources", "placement", placementOld.Name, "number of clusters", len(selectedClusters))

// go through the existing cluster list and remove work from no longer scheduled clusters.
removed, scheduleErr := r.removeStaleWorks(ctx, placementNew.GetName(), placementOld.Status.TargetClusters, placementNew.Status.TargetClusters)
Expand All @@ -144,7 +144,7 @@ func (r *Reconciler) Reconcile(ctx context.Context, key controller.QueueKey) (ct
_ = r.Client.Status().Update(ctx, placementOld, client.FieldOwner(utils.PlacementFieldManagerName))
return ctrl.Result{}, scheduleErr
}
klog.V(3).InfoS("Successfully removed work resources from previously selected clusters", "placement", placementOld.Name, "removed clusters", removed)
klog.V(2).InfoS("Successfully removed work resources from previously selected clusters", "placement", placementOld.Name, "removed clusters", removed)

// the schedule has succeeded, so we now can use the placementNew status that contains all the newly selected cluster and resources
r.updatePlacementScheduledCondition(placementNew, nil)
Expand All @@ -157,7 +157,7 @@ func (r *Reconciler) Reconcile(ctx context.Context, key controller.QueueKey) (ct
_ = r.Client.Status().Update(ctx, placementNew, client.FieldOwner(utils.PlacementFieldManagerName))
return ctrl.Result{}, applyErr
}
klog.V(3).InfoS("Successfully collected work resources status from all selected clusters",
klog.V(2).InfoS("Successfully collected work resources status from all selected clusters",
"placement", placementOld.Name, "number of clusters", len(selectedClusters), "hasPending", hasPending,
"numberFailedPlacement", len(placementNew.Status.FailedResourcePlacements))

Expand All @@ -182,7 +182,7 @@ func (r *Reconciler) removeAllWorks(ctx context.Context, placement *fleetv1alpha
klog.ErrorS(removeErr, "failed to remove all the work resources from previously selected clusters", "placement", placeRef)
return ctrl.Result{}, removeErr
}
klog.V(3).InfoS("Successfully removed work resources from previously selected clusters",
klog.V(2).InfoS("Successfully removed work resources from previously selected clusters",
"placement", placeRef, "number of removed clusters", removed)
placement.Status.TargetClusters = nil
placement.Status.SelectedResources = nil
Expand Down Expand Up @@ -262,7 +262,7 @@ func (r *Reconciler) updatePlacementScheduledCondition(placement *fleetv1alpha1.
ObservedGeneration: placement.Generation,
})
if schedCond == nil || schedCond.Status != metav1.ConditionTrue {
klog.V(3).InfoS("successfully scheduled all selected resources to their clusters", "placement", placementRef)
klog.V(2).InfoS("successfully scheduled all selected resources to their clusters", "placement", placementRef)
r.Recorder.Event(placement, corev1.EventTypeNormal, "ResourceScheduled", "successfully scheduled all selected resources to their clusters")
}
} else {
Expand Down Expand Up @@ -293,7 +293,7 @@ func (r *Reconciler) updatePlacementAppliedCondition(placement *fleetv1alpha1.Cl
Message: "Successfully applied resources to member clusters",
ObservedGeneration: placement.Generation,
})
klog.V(3).InfoS("successfully applied all selected resources", "placement", placementRef)
klog.V(2).InfoS("successfully applied all selected resources", "placement", placementRef)
if preAppliedCond == nil || preAppliedCond.Status != metav1.ConditionTrue {
r.Recorder.Event(placement, corev1.EventTypeNormal, "ResourceApplied", "successfully applied all selected resources")
}
Expand All @@ -305,7 +305,7 @@ func (r *Reconciler) updatePlacementAppliedCondition(placement *fleetv1alpha1.Cl
Message: applyErr.Error(),
ObservedGeneration: placement.Generation,
})
klog.V(3).InfoS("Some selected resources are still waiting to be applied", "placement", placementRef)
klog.V(2).InfoS("Some selected resources are still waiting to be applied", "placement", placementRef)
if preAppliedCond == nil || preAppliedCond.Status == metav1.ConditionTrue {
r.Recorder.Event(placement, corev1.EventTypeWarning, "ResourceApplyPending", "Some applied resources are now waiting to be applied to the member cluster")
}
Expand All @@ -318,7 +318,7 @@ func (r *Reconciler) updatePlacementAppliedCondition(placement *fleetv1alpha1.Cl
Message: applyErr.Error(),
ObservedGeneration: placement.Generation,
})
klog.V(3).InfoS("failed to apply some selected resources", "placement", placementRef)
klog.V(2).InfoS("failed to apply some selected resources", "placement", placementRef)
if preAppliedCond == nil || preAppliedCond.Status != metav1.ConditionFalse {
r.Recorder.Event(placement, corev1.EventTypeWarning, "ResourceApplyFailed", "failed to apply some selected resources")
}
Expand Down
18 changes: 9 additions & 9 deletions pkg/controllers/clusterresourceplacement/resource_selector.go
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,7 @@ func (r *Reconciler) selectResources(ctx context.Context, placement *fleetv1alph
Namespace: unstructuredObj.GetNamespace(),
}
placement.Status.SelectedResources = append(placement.Status.SelectedResources, res)
klog.V(4).InfoS("selected one resource ", "placement", placement.Name, "resource", res)
klog.V(2).InfoS("selected one resource ", "placement", placement.Name, "resource", res)
manifest, err := generateManifest(unstructuredObj)
if err != nil {
return nil, err
Expand All @@ -68,7 +68,7 @@ func (r *Reconciler) gatherSelectedResource(ctx context.Context, placement *flee
}

if r.DisabledResourceConfig.IsResourceDisabled(gvk) {
klog.V(4).InfoS("Skip select resource", "group version kind", gvk.String())
klog.V(2).InfoS("Skip select resource", "group version kind", gvk.String())
continue
}
var objs []runtime.Object
Expand Down Expand Up @@ -105,7 +105,7 @@ func (r *Reconciler) gatherSelectedResource(ctx context.Context, placement *flee

// fetchClusterScopedResources retrieve the objects based on the selector.
func (r *Reconciler) fetchClusterScopedResources(ctx context.Context, selector fleetv1alpha1.ClusterResourceSelector, placeName string) ([]runtime.Object, error) {
klog.V(4).InfoS("start to fetch the cluster scoped resources by the selector", "selector", selector)
klog.V(2).InfoS("start to fetch the cluster scoped resources by the selector", "selector", selector)
gk := schema.GroupKind{
Group: selector.Group,
Kind: selector.Kind,
Expand Down Expand Up @@ -138,7 +138,7 @@ func (r *Reconciler) fetchClusterScopedResources(ctx context.Context, selector f
uObj := obj.DeepCopyObject().(*unstructured.Unstructured)
if uObj.GetDeletionTimestamp() != nil {
// skip a to be deleted namespace
klog.V(4).InfoS("skip the deleting cluster scoped resources by the selector",
klog.V(2).InfoS("skip the deleting cluster scoped resources by the selector",
"selector", selector, "placeName", placeName, "resource name", uObj.GetName())
return []runtime.Object{}, nil
}
Expand All @@ -165,7 +165,7 @@ func (r *Reconciler) fetchClusterScopedResources(ctx context.Context, selector f
uObj := objects[i].DeepCopyObject().(*unstructured.Unstructured)
if uObj.GetDeletionTimestamp() != nil {
// skip a to be deleted namespace
klog.V(4).InfoS("skip the deleting cluster scoped resources by the selector",
klog.V(2).InfoS("skip the deleting cluster scoped resources by the selector",
"selector", selector, "placeName", placeName, "resource name", uObj.GetName())
continue
}
Expand All @@ -177,7 +177,7 @@ func (r *Reconciler) fetchClusterScopedResources(ctx context.Context, selector f

// fetchNamespaceResources retrieve all the objects for a ClusterResourceSelector that is for namespace.
func (r *Reconciler) fetchNamespaceResources(ctx context.Context, selector fleetv1alpha1.ClusterResourceSelector, placeName string) ([]runtime.Object, error) {
klog.V(4).InfoS("start to fetch the namespace resources by the selector", "selector", selector)
klog.V(2).InfoS("start to fetch the namespace resources by the selector", "selector", selector)
var resources []runtime.Object

if len(selector.Name) != 0 {
Expand Down Expand Up @@ -229,7 +229,7 @@ func (r *Reconciler) fetchAllResourcesInOneNamespace(ctx context.Context, namesp
return nil, errors.New(fmt.Sprintf("namespace %s is not allowed to propagate", namespaceName))
}

klog.V(4).InfoS("start to fetch all the resources inside a namespace", "namespace", namespaceName)
klog.V(2).InfoS("start to fetch all the resources inside a namespace", "namespace", namespaceName)
// select the namespace object itself
obj, err := r.InformerManager.Lister(utils.NamespaceGVR).Get(namespaceName)
if err != nil {
Expand All @@ -239,7 +239,7 @@ func (r *Reconciler) fetchAllResourcesInOneNamespace(ctx context.Context, namesp
nameSpaceObj := obj.DeepCopyObject().(*unstructured.Unstructured)
if nameSpaceObj.GetDeletionTimestamp() != nil {
// skip a to be deleted namespace
klog.V(4).InfoS("skip the deleting namespace resources by the selector",
klog.V(2).InfoS("skip the deleting namespace resources by the selector",
"placeName", placeName, "namespace", namespaceName)
return resources, nil
}
Expand Down Expand Up @@ -285,7 +285,7 @@ func (r *Reconciler) shouldSelectResource(gvr schema.GroupVersionResource) bool
}
for _, gvk := range gvks {
if r.DisabledResourceConfig.IsResourceDisabled(gvk) {
klog.V(4).InfoS("Skip watch resource", "group version kind", gvk.String())
klog.V(2).InfoS("Skip watch resource", "group version kind", gvk.String())
return false
}
}
Expand Down
16 changes: 8 additions & 8 deletions pkg/controllers/clusterresourceplacement/work_propagation.go
Original file line number Diff line number Diff line change
Expand Up @@ -100,7 +100,7 @@ func (r *Reconciler) scheduleWork(ctx context.Context, placement *fleetv1alpha1.
}
existingHash := curWork.GetAnnotations()[SpecHashAnnotationKey]
if existingHash == specHash || reflect.DeepEqual(curWork.Spec.Workload.Manifests, workerSpec.Workload.Manifests) {
klog.V(4).InfoS("skip updating work spec as its identical",
klog.V(2).InfoS("skip updating work spec as its identical",
"member cluster namespace", memberClusterNsName, "work name", workName, "number of manifests", len(manifests))
continue
}
Expand All @@ -113,13 +113,13 @@ func (r *Reconciler) scheduleWork(ctx context.Context, placement *fleetv1alpha1.
allErr = append(allErr, errors.Wrap(updateErr, fmt.Sprintf("failed to update the work obj %s in namespace %s", workName, memberClusterNsName)))
continue
}
klog.V(3).InfoS("updated work spec with manifests",
klog.V(2).InfoS("updated work spec with manifests",
"member cluster namespace", memberClusterNsName, "work name", workName, "number of manifests", len(manifests))
}
if changed {
klog.V(2).InfoS("Applied all work to the selected cluster namespaces", "placement", klog.KObj(placement), "number of clusters", len(memberClusterNames))
} else {
klog.V(3).InfoS("Nothing new to apply for the cluster resource placement", "placement", klog.KObj(placement), "number of clusters", len(memberClusterNames))
klog.V(2).InfoS("Nothing new to apply for the cluster resource placement", "placement", klog.KObj(placement), "number of clusters", len(memberClusterNames))
}

return apiErrors.NewAggregate(allErr)
Expand Down Expand Up @@ -166,7 +166,7 @@ func (r *Reconciler) collectAllManifestsStatus(placement *fleetv1alpha1.ClusterR
work, err := r.getResourceBinding(memberClusterNsName, workName)
if err != nil {
if apierrors.IsNotFound(err) {
klog.V(3).InfoS("the work change has not shown up in the cache yet",
klog.V(2).InfoS("the work change has not shown up in the cache yet",
"work", klog.KRef(memberClusterNsName, workName), "cluster", cluster)
hasPending = true
continue
Expand All @@ -177,19 +177,19 @@ func (r *Reconciler) collectAllManifestsStatus(placement *fleetv1alpha1.ClusterR
appliedCond := meta.FindStatusCondition(work.Status.Conditions, workapi.ConditionTypeApplied)
if appliedCond == nil {
hasPending = true
klog.V(4).InfoS("the work is never picked up by the member cluster",
klog.V(2).InfoS("the work is never picked up by the member cluster",
"work", klog.KObj(work), "cluster", cluster)
continue
}
if appliedCond.ObservedGeneration < work.GetGeneration() {
hasPending = true
klog.V(4).InfoS("the update of the work is not picked up by the member cluster yet",
klog.V(2).InfoS("the update of the work is not picked up by the member cluster yet",
"work", klog.KObj(work), "cluster", cluster, "work generation", work.GetGeneration(),
"applied generation", appliedCond.ObservedGeneration)
continue
}
if appliedCond.Status == metav1.ConditionTrue {
klog.V(4).InfoS("the work is applied successfully by the member cluster",
klog.V(2).InfoS("the work is applied successfully by the member cluster",
"work", klog.KObj(work), "cluster", cluster)
continue
}
Expand All @@ -204,7 +204,7 @@ func (r *Reconciler) collectAllManifestsStatus(placement *fleetv1alpha1.ClusterR
appliedCond = meta.FindStatusCondition(manifestCondition.Conditions, workapi.ConditionTypeApplied)
// collect if there is an explicit fail
if appliedCond != nil && appliedCond.Status != metav1.ConditionTrue {
klog.V(3).InfoS("find a failed to apply manifest", "member cluster namespace", memberClusterNsName,
klog.V(2).InfoS("find a failed to apply manifest", "member cluster namespace", memberClusterNsName,
"manifest name", manifestCondition.Identifier.Name, "group", manifestCondition.Identifier.Group,
"version", manifestCondition.Identifier.Version, "kind", manifestCondition.Identifier.Kind)
placement.Status.FailedResourcePlacements = append(placement.Status.FailedResourcePlacements, fleetv1alpha1.FailedResourcePlacement{
Expand Down
Loading

0 comments on commit 52c56b2

Please sign in to comment.