Skip to content

Commit

Permalink
add metrics for the internal pd client calls
Browse files Browse the repository at this point in the history
Signed-off-by: Ryan Leung <rleungx@gmail.com>
  • Loading branch information
rleungx committed Feb 7, 2025
1 parent 0c43ce5 commit 4dcadb4
Show file tree
Hide file tree
Showing 2 changed files with 48 additions and 3 deletions.
42 changes: 39 additions & 3 deletions client/metrics/metrics.go
Original file line number Diff line number Diff line change
Expand Up @@ -64,9 +64,11 @@ func InitAndRegisterMetrics(constLabels prometheus.Labels) {
}

var (
cmdDuration *prometheus.HistogramVec
cmdFailedDuration *prometheus.HistogramVec
requestDuration *prometheus.HistogramVec
cmdDuration *prometheus.HistogramVec
cmdFailedDuration *prometheus.HistogramVec
internalCmdDuration *prometheus.HistogramVec
internalCmdFailedDuration *prometheus.HistogramVec
requestDuration *prometheus.HistogramVec

// TSOBestBatchSize is the histogram of the best batch size of TSO requests.
TSOBestBatchSize prometheus.Histogram
Expand Down Expand Up @@ -105,6 +107,26 @@ func initMetrics(constLabels prometheus.Labels) {
Buckets: prometheus.ExponentialBuckets(0.0005, 2, 13),
}, []string{"type"})

internalCmdDuration = prometheus.NewHistogramVec(
prometheus.HistogramOpts{
Namespace: "pd_client",
Subsystem: "internal_cmd",
Name: "handle_cmds_duration_seconds",
Help: "Bucketed histogram of processing time (s) of handled success internal cmds.",
ConstLabels: constLabels,
Buckets: prometheus.ExponentialBuckets(0.0005, 2, 13),
}, []string{"type"})

internalCmdFailedDuration = prometheus.NewHistogramVec(
prometheus.HistogramOpts{
Namespace: "pd_client",
Subsystem: "internal_cmd",
Name: "handle_failed_cmds_duration_seconds",
Help: "Bucketed histogram of processing time (s) of failed handled internal cmds.",
ConstLabels: constLabels,
Buckets: prometheus.ExponentialBuckets(0.0005, 2, 13),
}, []string{"type"})

requestDuration = prometheus.NewHistogramVec(
prometheus.HistogramOpts{
Namespace: "pd_client",
Expand Down Expand Up @@ -228,6 +250,12 @@ var (
CmdFailedDurationUpdateGCSafePointV2 prometheus.Observer
CmdFailedDurationUpdateServiceSafePointV2 prometheus.Observer

InternalCmdDurationGetClusterInfo prometheus.Observer
InternalCmdDurationGetMembers prometheus.Observer

InternalCmdFailedDurationGetClusterInfo prometheus.Observer
InternalCmdFailedDurationGetMembers prometheus.Observer

// RequestDurationTSO records the durations of the successful TSO requests.
RequestDurationTSO prometheus.Observer
// RequestFailedDurationTSO records the durations of the failed TSO requests.
Expand Down Expand Up @@ -281,13 +309,21 @@ func initCmdDurations() {
CmdFailedDurationUpdateGCSafePointV2 = cmdFailedDuration.WithLabelValues("update_gc_safe_point_v2")
CmdFailedDurationUpdateServiceSafePointV2 = cmdFailedDuration.WithLabelValues("update_service_safe_point_v2")

InternalCmdDurationGetClusterInfo = internalCmdDuration.WithLabelValues("get_cluster_info")
InternalCmdDurationGetMembers = internalCmdDuration.WithLabelValues("get_members")

InternalCmdFailedDurationGetClusterInfo = internalCmdFailedDuration.WithLabelValues("get_cluster_info")
InternalCmdFailedDurationGetMembers = internalCmdFailedDuration.WithLabelValues("get_members")

RequestDurationTSO = requestDuration.WithLabelValues("tso")
RequestFailedDurationTSO = requestDuration.WithLabelValues("tso-failed")
}

func registerMetrics() {
prometheus.MustRegister(cmdDuration)
prometheus.MustRegister(cmdFailedDuration)
prometheus.MustRegister(internalCmdDuration)
prometheus.MustRegister(internalCmdFailedDuration)
prometheus.MustRegister(requestDuration)
prometheus.MustRegister(TSOBestBatchSize)
prometheus.MustRegister(TSOBatchSize)
Expand Down
9 changes: 9 additions & 0 deletions client/servicediscovery/service_discovery.go
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,7 @@ import (

"github.com/tikv/pd/client/constants"
"github.com/tikv/pd/client/errs"
"github.com/tikv/pd/client/metrics"
"github.com/tikv/pd/client/opt"
"github.com/tikv/pd/client/pkg/retry"
"github.com/tikv/pd/client/pkg/utils/grpcutil"
Expand Down Expand Up @@ -909,12 +910,16 @@ func (c *serviceDiscovery) getClusterInfo(ctx context.Context, url string, timeo
if err != nil {
return nil, err
}
start := time.Now()
defer func() { metrics.InternalCmdDurationGetClusterInfo.Observe(time.Since(start).Seconds()) }()
clusterInfo, err := pdpb.NewPDClient(cc).GetClusterInfo(ctx, &pdpb.GetClusterInfoRequest{})
if err != nil {
metrics.InternalCmdFailedDurationGetClusterInfo.Observe(time.Since(start).Seconds())
attachErr := errors.Errorf("error:%s target:%s status:%s", err, cc.Target(), cc.GetState().String())
return nil, errs.ErrClientGetClusterInfo.Wrap(attachErr).GenWithStackByCause()
}
if clusterInfo.GetHeader().GetError() != nil {
metrics.InternalCmdFailedDurationGetClusterInfo.Observe(time.Since(start).Seconds())
attachErr := errors.Errorf("error:%s target:%s status:%s", clusterInfo.GetHeader().GetError().String(), cc.Target(), cc.GetState().String())
return nil, errs.ErrClientGetClusterInfo.Wrap(attachErr).GenWithStackByCause()
}
Expand All @@ -928,12 +933,16 @@ func (c *serviceDiscovery) getMembers(ctx context.Context, url string, timeout t
if err != nil {
return nil, err
}
start := time.Now()
defer func() { metrics.InternalCmdDurationGetMembers.Observe(time.Since(start).Seconds()) }()
members, err := pdpb.NewPDClient(cc).GetMembers(ctx, &pdpb.GetMembersRequest{})
if err != nil {
metrics.InternalCmdFailedDurationGetMembers.Observe(time.Since(start).Seconds())
attachErr := errors.Errorf("error:%s target:%s status:%s", err, cc.Target(), cc.GetState().String())
return nil, errs.ErrClientGetMember.Wrap(attachErr).GenWithStackByCause()
}
if members.GetHeader().GetError() != nil {
metrics.InternalCmdFailedDurationGetMembers.Observe(time.Since(start).Seconds())
attachErr := errors.Errorf("error:%s target:%s status:%s", members.GetHeader().GetError().String(), cc.Target(), cc.GetState().String())
return nil, errs.ErrClientGetMember.Wrap(attachErr).GenWithStackByCause()
}
Expand Down

0 comments on commit 4dcadb4

Please sign in to comment.