From 1f8826f2c32a5790cb80c0a9496f10a13ecb8da4 Mon Sep 17 00:00:00 2001 From: JmPotato Date: Thu, 13 Jun 2024 11:58:12 +0800 Subject: [PATCH] Let server TSO handle duration including failed requests Signed-off-by: JmPotato --- metrics/grafana/pd.json | 50 +++++++++++++++++++++-------------------- server/grpc_service.go | 2 +- 2 files changed, 27 insertions(+), 25 deletions(-) diff --git a/metrics/grafana/pd.json b/metrics/grafana/pd.json index 7965a341f6c..abfe049b905 100644 --- a/metrics/grafana/pd.json +++ b/metrics/grafana/pd.json @@ -10633,20 +10633,13 @@ "refId": "C", "step": 2 }, - { - "expr": "histogram_quantile(0.99999, sum(rate(pd_server_handle_tso_duration_seconds_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\"}[30s])) by (type, le))", - "format": "time_series", - "intervalFactor": 1, - "legendFormat": "99.999% tso", - "refId": "D" - }, { "expr": "histogram_quantile(0.90, sum(rate(tso_server_handle_tso_duration_seconds_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\"}[30s])) by (type, le))", "format": "time_series", "hide": false, "intervalFactor": 2, "legendFormat": "90% tso", - "refId": "E", + "refId": "D", "step": 2 }, { @@ -10655,7 +10648,7 @@ "hide": false, "intervalFactor": 2, "legendFormat": "99% tso", - "refId": "F", + "refId": "E", "step": 2 }, { @@ -10664,22 +10657,15 @@ "hide": false, "intervalFactor": 2, "legendFormat": "99.9% tso", - "refId": "G", + "refId": "F", "step": 2 - }, - { - "expr": "histogram_quantile(0.99999, sum(rate(tso_server_handle_tso_duration_seconds_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\"}[30s])) by (type, le))", - "format": "time_series", - "intervalFactor": 1, - "legendFormat": "99.999% tso", - "refId": "H" } ], "thresholds": [], "timeFrom": null, "timeRegions": [], "timeShift": null, - "title": "PD server TSO handle time", + "title": "PD server TSO handle duration", "tooltip": { "msResolution": false, "shared": true, @@ -10766,26 +10752,42 @@ "steppedLine": false, "targets": [ { - "expr": "histogram_quantile(0.98, sum(rate(pd_client_request_handle_requests_duration_seconds_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\"}[30s])) by (type, le))", - "hide": false, + "expr": "avg(rate(pd_client_request_handle_requests_duration_seconds_sum{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\"}[30s])) by (type) / avg(rate(pd_client_request_handle_requests_duration_seconds_count{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\"}[30s])) by (type)", "intervalFactor": 2, - "legendFormat": "{{type}} 98th percentile", + "legendFormat": "avg {{type}}", "refId": "A", "step": 2 }, { - "expr": "avg(rate(pd_client_request_handle_requests_duration_seconds_sum{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\"}[30s])) by (type) / avg(rate(pd_client_request_handle_requests_duration_seconds_count{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\"}[30s])) by (type)", + "expr": "histogram_quantile(0.90, sum(rate(pd_client_request_handle_requests_duration_seconds_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\"}[30s])) by (type, le))", + "hide": false, "intervalFactor": 2, - "legendFormat": "{{type}} average", + "legendFormat": "90% {{type}}", "refId": "B", "step": 2 + }, + { + "expr": "histogram_quantile(0.99, sum(rate(pd_client_request_handle_requests_duration_seconds_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\"}[30s])) by (type, le))", + "hide": false, + "intervalFactor": 2, + "legendFormat": "99% {{type}}", + "refId": "C", + "step": 2 + }, + { + "expr": "histogram_quantile(0.999, sum(rate(pd_client_request_handle_requests_duration_seconds_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\"}[30s])) by (type, le))", + "hide": false, + "intervalFactor": 2, + "legendFormat": "99.9% {{type}}", + "refId": "D", + "step": 2 } ], "thresholds": [], "timeFrom": null, "timeRegions": [], "timeShift": null, - "title": "Handle requests duration", + "title": "PD client requests handle duration", "tooltip": { "msResolution": false, "shared": true, diff --git a/server/grpc_service.go b/server/grpc_service.go index acfc87fcf71..5bc1f110965 100644 --- a/server/grpc_service.go +++ b/server/grpc_service.go @@ -578,10 +578,10 @@ func (s *GrpcServer) Tso(stream pdpb.PD_TsoServer) error { ctx, task := trace.NewTask(ctx, "tso") ts, err := s.tsoAllocatorManager.HandleRequest(ctx, request.GetDcLocation(), count) task.End() + tsoHandleDuration.Observe(time.Since(start).Seconds()) if err != nil { return status.Errorf(codes.Unknown, err.Error()) } - tsoHandleDuration.Observe(time.Since(start).Seconds()) response := &pdpb.TsoResponse{ Header: s.header(), Timestamp: &ts,