From c4a424a5b31b0cd545b1f9c357c3ef938fd270ef Mon Sep 17 00:00:00 2001 From: Benjamin Wang Date: Wed, 29 Jan 2025 13:27:20 +0000 Subject: [PATCH] add e2e metrics test case to ensure no any metrics missing in future PRs Signed-off-by: Benjamin Wang --- tests/e2e/metrics_test.go | 241 +++++++++++++++++++++++++++++++++ tests/framework/e2e/cluster.go | 4 + tests/go.mod | 2 +- 3 files changed, 246 insertions(+), 1 deletion(-) diff --git a/tests/e2e/metrics_test.go b/tests/e2e/metrics_test.go index ee3009632d0..8db2822838f 100644 --- a/tests/e2e/metrics_test.go +++ b/tests/e2e/metrics_test.go @@ -15,11 +15,17 @@ package e2e import ( + "bytes" "context" "fmt" + "io" + "net/http" + "net/url" "testing" "time" + dto "github.com/prometheus/client_model/go" + "github.com/prometheus/common/expfmt" "github.com/stretchr/testify/require" "go.etcd.io/etcd/api/v3/version" @@ -126,3 +132,238 @@ func expectLearnerMetric(cx ctlCtx, procIdx int, expectMetric string) { args := e2e.CURLPrefixArgsCluster(cx.epc.Cfg, cx.epc.Procs[procIdx], "GET", e2e.CURLReq{Endpoint: "/metrics"}) require.NoError(cx.t, e2e.SpawnWithExpectsContext(ctx, args, nil, expect.ExpectedResponse{Value: expectMetric})) } + +func TestNoMetricsMissing(t *testing.T) { + var ( + // Note the list doesn't contain all the metrics, because the + // labelled metrics won't be exposed by prometheus by default. + // They are only exposed when at least one value with labels + // is set. + basicMetrics = []string{ + "etcd_cluster_version", + "etcd_debugging_auth_revision", + "etcd_debugging_disk_backend_commit_rebalance_duration_seconds", + "etcd_debugging_disk_backend_commit_spill_duration_seconds", + "etcd_debugging_disk_backend_commit_write_duration_seconds", + "etcd_debugging_lease_granted_total", + "etcd_debugging_lease_renewed_total", + "etcd_debugging_lease_revoked_total", + "etcd_debugging_lease_ttl_total", + "etcd_debugging_mvcc_compact_revision", + "etcd_debugging_mvcc_current_revision", + "etcd_debugging_mvcc_db_compaction_keys_total", + "etcd_debugging_mvcc_db_compaction_last", + "etcd_debugging_mvcc_db_compaction_pause_duration_milliseconds", + "etcd_debugging_mvcc_db_compaction_total_duration_milliseconds", + "etcd_debugging_mvcc_events_total", + "etcd_debugging_mvcc_index_compaction_pause_duration_milliseconds", + "etcd_debugging_mvcc_keys_total", + "etcd_debugging_mvcc_pending_events_total", + "etcd_debugging_mvcc_slow_watcher_total", + "etcd_debugging_mvcc_total_put_size_in_bytes", + "etcd_debugging_mvcc_watch_stream_total", + "etcd_debugging_mvcc_watcher_total", + "etcd_debugging_server_lease_expired_total", + "etcd_debugging_snap_save_marshalling_duration_seconds", + "etcd_debugging_snap_save_total_duration_seconds", + "etcd_debugging_store_expires_total", + "etcd_debugging_store_reads_total", + "etcd_debugging_store_watch_requests_total", + "etcd_debugging_store_watchers", + "etcd_debugging_store_writes_total", + "etcd_disk_backend_commit_duration_seconds", + "etcd_disk_backend_defrag_duration_seconds", + "etcd_disk_backend_snapshot_duration_seconds", + "etcd_disk_defrag_inflight", + "etcd_disk_wal_fsync_duration_seconds", + "etcd_disk_wal_write_bytes_total", + "etcd_disk_wal_write_duration_seconds", + "etcd_grpc_proxy_cache_hits_total", + "etcd_grpc_proxy_cache_keys_total", + "etcd_grpc_proxy_cache_misses_total", + "etcd_grpc_proxy_events_coalescing_total", + "etcd_grpc_proxy_watchers_coalescing_total", + "etcd_mvcc_db_open_read_transactions", + "etcd_mvcc_db_total_size_in_bytes", + "etcd_mvcc_db_total_size_in_use_in_bytes", + "etcd_mvcc_delete_total", + "etcd_mvcc_hash_duration_seconds", + "etcd_mvcc_hash_rev_duration_seconds", + "etcd_mvcc_put_total", + "etcd_mvcc_range_total", + "etcd_mvcc_txn_total", + "etcd_network_client_grpc_received_bytes_total", + "etcd_network_client_grpc_sent_bytes_total", + "etcd_network_known_peers", + "etcd_server_apply_duration_seconds", + "etcd_server_client_requests_total", + "etcd_server_go_version", + "etcd_server_has_leader", + "etcd_server_health_failures", + "etcd_server_health_success", + "etcd_server_heartbeat_send_failures_total", + "etcd_server_id", + "etcd_server_is_leader", + "etcd_server_is_learner", + "etcd_server_leader_changes_seen_total", + "etcd_server_learner_promote_successes", + "etcd_server_proposals_applied_total", + "etcd_server_proposals_committed_total", + "etcd_server_proposals_failed_total", + "etcd_server_proposals_pending", + "etcd_server_quota_backend_bytes", + "etcd_server_range_duration_seconds", + "etcd_server_read_indexes_failed_total", + "etcd_server_slow_apply_total", + "etcd_server_slow_read_indexes_total", + "etcd_server_snapshot_apply_in_progress_total", + "etcd_server_version", + "etcd_snap_db_fsync_duration_seconds", + "etcd_snap_db_save_total_duration_seconds", + "etcd_snap_fsync_duration_seconds", + "go_gc_duration_seconds", + "go_gc_gogc_percent", + "go_gc_gomemlimit_bytes", + "go_goroutines", + "go_info", + "go_memstats_alloc_bytes", + "go_memstats_alloc_bytes_total", + "go_memstats_buck_hash_sys_bytes", + "go_memstats_frees_total", + "go_memstats_gc_sys_bytes", + "go_memstats_heap_alloc_bytes", + "go_memstats_heap_idle_bytes", + "go_memstats_heap_inuse_bytes", + "go_memstats_heap_objects", + "go_memstats_heap_released_bytes", + "go_memstats_heap_sys_bytes", + "go_memstats_last_gc_time_seconds", + "go_memstats_mallocs_total", + "go_memstats_mcache_inuse_bytes", + "go_memstats_mcache_sys_bytes", + "go_memstats_mspan_inuse_bytes", + "go_memstats_mspan_sys_bytes", + "go_memstats_next_gc_bytes", + "go_memstats_other_sys_bytes", + "go_memstats_stack_inuse_bytes", + "go_memstats_stack_sys_bytes", + "go_memstats_sys_bytes", + "go_sched_gomaxprocs_threads", + "go_threads", + "grpc_server_handled_total", + "grpc_server_msg_received_total", + "grpc_server_msg_sent_total", + "grpc_server_started_total", + "os_fd_limit", + "os_fd_used", + "promhttp_metric_handler_requests_in_flight", + "promhttp_metric_handler_requests_total", + } + extraMultipleMemberClusterMetrics = []string{ + "etcd_network_active_peers", + "etcd_network_peer_received_bytes_total", + "etcd_network_peer_sent_bytes_total", + } + extraExtensiveMetrics = []string{"grpc_server_handling_seconds"} + ) + + testCases := []struct { + name string + options []e2e.EPClusterOption + expectedMetrics []string + }{ + { + name: "basic metrics of 1 member cluster", + options: []e2e.EPClusterOption{ + e2e.WithClusterSize(1), + }, + expectedMetrics: basicMetrics, + }, + { + name: "basic metrics of 3 member cluster", + options: []e2e.EPClusterOption{ + e2e.WithClusterSize(3), + }, + expectedMetrics: append(basicMetrics, extraMultipleMemberClusterMetrics...), + }, + { + name: "extensive metrics of 1 member cluster", + options: []e2e.EPClusterOption{ + e2e.WithClusterSize(1), + e2e.WithExtensiveMetrics(), + }, + expectedMetrics: append(basicMetrics, extraExtensiveMetrics...), + }, + { + name: "extensive metrics of 3 member cluster", + options: []e2e.EPClusterOption{ + e2e.WithClusterSize(3), + e2e.WithExtensiveMetrics(), + }, + expectedMetrics: append(append(basicMetrics, extraExtensiveMetrics...), extraMultipleMemberClusterMetrics...), + }, + } + + for _, tc := range testCases { + t.Run(tc.name, func(t *testing.T) { + e2e.BeforeTest(t) + ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second) + defer cancel() + + epc, err := e2e.NewEtcdProcessCluster(ctx, t, tc.options...) + require.NoError(t, err) + defer epc.Close() + + c := epc.Procs[0].Etcdctl() + for i := 0; i < 3; i++ { + err = c.Put(ctx, fmt.Sprintf("key_%d", i), fmt.Sprintf("value_%d", i), config.PutOptions{}) + require.NoError(t, err) + } + _, err = c.Get(ctx, "k", config.GetOptions{}) + require.NoError(t, err) + + metricsURL, err := url.JoinPath(epc.Procs[0].Config().ClientURL, "metrics") + require.NoError(t, err) + + mfs, err := getMetrics(metricsURL) + require.NoError(t, err) + + var missingMetrics []string + for _, metrics := range tc.expectedMetrics { + if _, ok := mfs[metrics]; !ok { + missingMetrics = append(missingMetrics, metrics) + } + } + require.Emptyf(t, missingMetrics, "Some metrics are missing: %v", missingMetrics) + + // Please keep the log below to generate the expected metrics. + // t.Logf("All metrics: %v", formatMetrics(slices.Sorted(maps.Keys(mfs)))) + }) + } +} + +func getMetrics(metricsURL string) (map[string]*dto.MetricFamily, error) { + httpClient := http.Client{Transport: &http.Transport{}} + resp, err := httpClient.Get(metricsURL) + if err != nil { + return nil, err + } + + data, err := io.ReadAll(resp.Body) + if err != nil { + return nil, err + } + + var parser expfmt.TextParser + return parser.TextToMetricFamilies(bytes.NewReader(data)) +} + +// formatMetrics is only for test purpose +/*func formatMetrics(metrics []string) string { + quoted := make([]string, len(metrics)) + for i, s := range metrics { + quoted[i] = fmt.Sprintf(`"%s",`, s) + } + + return fmt.Sprintf("[]string{\n%s\n}", strings.Join(quoted, "\n")) +}*/ diff --git a/tests/framework/e2e/cluster.go b/tests/framework/e2e/cluster.go index 3a2f8388835..282e9614901 100644 --- a/tests/framework/e2e/cluster.go +++ b/tests/framework/e2e/cluster.go @@ -416,6 +416,10 @@ func WithCipherSuites(suites []string) EPClusterOption { return func(c *EtcdProcessClusterConfig) { c.ServerConfig.CipherSuites = suites } } +func WithExtensiveMetrics() EPClusterOption { + return func(c *EtcdProcessClusterConfig) { c.ServerConfig.Metrics = "extensive" } +} + // NewEtcdProcessCluster launches a new cluster from etcd processes, returning // a new EtcdProcessCluster once all nodes are ready to accept client requests. func NewEtcdProcessCluster(ctx context.Context, t testing.TB, opts ...EPClusterOption) (*EtcdProcessCluster, error) { diff --git a/tests/go.mod b/tests/go.mod index 646ce8dca74..2daab290525 100644 --- a/tests/go.mod +++ b/tests/go.mod @@ -24,6 +24,7 @@ require ( github.com/grpc-ecosystem/go-grpc-prometheus v1.2.0 github.com/grpc-ecosystem/grpc-gateway/v2 v2.26.0 github.com/prometheus/client_golang v1.20.5 + github.com/prometheus/client_model v0.6.1 github.com/prometheus/common v0.62.0 github.com/soheilhy/cmux v0.1.5 github.com/stretchr/testify v1.10.0 @@ -79,7 +80,6 @@ require ( github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822 // indirect github.com/olekukonko/tablewriter v0.0.5 // indirect github.com/pmezard/go-difflib v1.0.0 // indirect - github.com/prometheus/client_model v0.6.1 // indirect github.com/prometheus/procfs v0.15.1 // indirect github.com/rivo/uniseg v0.4.7 // indirect github.com/sirupsen/logrus v1.9.3 // indirect