From be49bd0a1375795b2a78877702044327b04a2b30 Mon Sep 17 00:00:00 2001 From: Nichamon Naksinehaboon Date: Fri, 15 Dec 2023 16:36:48 -0600 Subject: [PATCH] Add the per-endpoint send queue depths to xprt_stats's results --- ldms/python/ldmsd/ldmsd_controller | 6 ++++++ ldms/src/ldmsd/ldmsd_request.c | 29 +++++++++++++++++++++++++---- lib/src/zap/zap.c | 5 +++++ lib/src/zap/zap.h | 9 +++++++++ 4 files changed, 45 insertions(+), 4 deletions(-) diff --git a/ldms/python/ldmsd/ldmsd_controller b/ldms/python/ldmsd/ldmsd_controller index 19c086e5d4..13a3b39927 100755 --- a/ldms/python/ldmsd/ldmsd_controller +++ b/ldms/python/ldmsd/ldmsd_controller @@ -2318,6 +2318,12 @@ class LdmsdCmdParser(cmd.Cmd): for op_name in op_stats: s = op_stats[op_name] print(f"{op_name:12} {s['count']:12} {s['min_us']:12} {s['mean_us']:12} {s['max_us']:12}") + print(f"{'-'*40}") + print(f"{'Endpoints':20} {'SQ Depth':10}") + print(f"{'-'*20} {'-'*10}") + sorted_ep = dict(sorted(stats['endpoints'].items())) + for k, x in sorted_ep.items(): + print(f"{k:20} {x['sq_sz']:10}") def do_xprt_stats(self, arg): """ diff --git a/ldms/src/ldmsd/ldmsd_request.c b/ldms/src/ldmsd/ldmsd_request.c index 1869d15d4c..c26d5356ff 100644 --- a/ldms/src/ldmsd/ldmsd_request.c +++ b/ldms/src/ldmsd/ldmsd_request.c @@ -6866,6 +6866,8 @@ static char *__xprt_stats_as_json(size_t *json_sz, int reset) char ip_str[32]; char xprt_type[16]; struct ldms_xprt_rate_data rate_data; + int rc, first = 1; + char lhostname[128], lport_no[32], rhostname[128], rport_no[32]; xprt_type[sizeof(xprt_type)-1] = 0; /* NULL-terminate at the end */ @@ -6880,15 +6882,32 @@ static char *__xprt_stats_as_json(size_t *json_sz, int reset) for (op_e = 0; op_e < LDMS_XPRT_OP_COUNT; op_e++) op_sum[op_e].op_min_us = LLONG_MAX; + __APPEND("{"); + __APPEND(" \"endpoints\":{"); + /* Compute summary statistics across all of the transports */ for (x = ldms_xprt_first(); x; x = ldms_xprt_next(x)) { ldms_stats_entry_t op; - - ldms_xprt_stats(x, &xs); - xprt_count += 1; zap_ep_t zep; zep = ldms_xprt_get_zap_ep(x); zap_ep_state_t ep_state = (zep ? zap_ep_state(zep) : ZAP_EP_CLOSE); + + if (x->zap_ep && (ZAP_EP_CONNECTED == zap_ep_state(x->zap_ep))) { + rc = ldms_xprt_names(x, lhostname, sizeof(lhostname), + lport_no, sizeof(lport_no), + rhostname, sizeof(rhostname), + rport_no, sizeof(rport_no), + NI_NAMEREQD | NI_NUMERICSERV); + + __APPEND(" %s\"%s:%s\":{", ((!first)?",":""), rhostname, rport_no); + __APPEND(" \"sq_sz\":%ld", zap_ep_sq_sz(zep)); + __APPEND(" }"); + first = 0; + } + + ldms_xprt_stats(x, &xs); + xprt_count += 1; + switch (ep_state) { case ZAP_EP_LISTENING: xprt_listen_count += 1; @@ -6923,6 +6942,9 @@ static char *__xprt_stats_as_json(size_t *json_sz, int reset) } ldms_xprt_put(x); } + + __APPEND("},"); + for (op_e = 0; op_e < LDMS_XPRT_OP_COUNT; op_e++) { if (op_sum[op_e].op_count) { op_sum[op_e].op_mean_us = @@ -6933,7 +6955,6 @@ static char *__xprt_stats_as_json(size_t *json_sz, int reset) (void)clock_gettime(CLOCK_REALTIME, &end); uint64_t compute_time = ldms_timespec_diff_us(&start, &end); - __APPEND("{"); __APPEND(" \"compute_time_us\": %ld,\n", compute_time); __APPEND(" \"connect_rate_s\": %f,\n", rate_data.connect_rate_s); __APPEND(" \"connect_request_rate_s\": %f,\n", rate_data.connect_request_rate_s); diff --git a/lib/src/zap/zap.c b/lib/src/zap/zap.c index f2f3ea938c..ec7ba54532 100644 --- a/lib/src/zap/zap.c +++ b/lib/src/zap/zap.c @@ -1156,6 +1156,11 @@ pid_t zap_ep_thread_id(zap_ep_t ep) return ep->thread?ep->thread->stat->tid:-1; } +uint64_t zap_ep_sq_sz(zap_ep_t ep) +{ + return ep->sq_sz; +} + static int zap_initialized = 0; static void zap_atfork() diff --git a/lib/src/zap/zap.h b/lib/src/zap/zap.h index 3c1dde3690..08bed5e528 100644 --- a/lib/src/zap/zap.h +++ b/lib/src/zap/zap.h @@ -936,4 +936,13 @@ pthread_t zap_ep_thread(zap_ep_t ep); */ pid_t zap_ep_thread_id(zap_ep_t ep); +/** + * Get the send queue depth of an endpoint + * + * \param ep A Zap endpoint handle + * + * \return The send queue depth + */ +uint64_t zap_ep_sq_sz(zap_ep_t ep); + #endif