From 54ac90abe4f0fd71340852e7ad4e89aedf5d4b25 Mon Sep 17 00:00:00 2001 From: Nichamon Naksinehaboon Date: Tue, 19 Dec 2023 22:41:04 -0600 Subject: [PATCH] [WIP] Report LDMSD's worker thread usages in thread_stats --- ldms/python/ldmsd/ldmsd_controller | 22 ++++++++++++--- ldms/src/ldmsd/ldmsd_request.c | 44 ++++++++++++++++++++++++++++-- 2 files changed, 59 insertions(+), 7 deletions(-) diff --git a/ldms/python/ldmsd/ldmsd_controller b/ldms/python/ldmsd/ldmsd_controller index 92c1165b5f..6133e0b3dc 100755 --- a/ldms/python/ldmsd/ldmsd_controller +++ b/ldms/python/ldmsd/ldmsd_controller @@ -2161,13 +2161,24 @@ class LdmsdCmdParser(cmd.Cmd): def complete_set_route(self, text, line, begidx, endidx): return self.__complete_attr_list('set_info', text) - def display_thread_stats(self, stats): + def display_worker_thread_stats(self, worker_threads): + print(f"LDMSD Worker Thread Statistics") + print(f"{'Thread ID':15} {'Linux Thread ID':20} {'Idle':15} {'Active':15} {'Duration (sec)':15} {'Event Counts':15}") + print(f"{'-'*15} {'-'*20} {'-'*15} {'-'*15} {'-'*15} {'-'*15}") + for e in worker_threads: + print(f"{e['tid']:^15} {e['thread_id']:20} {e['idle_pc']:14.2f}% " \ + f"{e['active_pc']:14.2f}% {e['total_us']/1000000:15.2f}" \ + f"{e['ev_cnt']:15}") + print(f"{'-'*15}-{'-'*20}-{'-'*15}-{'-'*15}-{'-'*15}-{'-'*15}") + + def display_io_thread_stats(self, io_threads): + print(f"IO Thread Statistics") print(f"{'Thread ID':15} {'Linux Thread ID':20} {'Name':16} {'Samples':12} {'Sample Rate':12} " \ f"{'Utilization':12} {'Send Queue Size':16} " \ f"{'Num of EPs':12}") print(f"{'-'*15} {'-'*20} ---------------- ------------ ------------ ------------ "\ "---------------- ------------") - for e in stats['entries']: + for e in io_threads: print(f"{e['tid']:^15} {e['thread_id']:20} {e['name']:16} {e['sample_count']:12.0f} " \ f"{e['sample_rate']:12.2f} {e['utilization'] * 100:12.2f} " \ f"{e['sq_sz']:16} {e['n_eps']:12}") @@ -2193,11 +2204,14 @@ class LdmsdCmdParser(cmd.Cmd): if msg == "": return if rc != 0: + print(f"Error {msg['errcode']}: {resp['msg']}") return msg = fmt_status(msg) - self.display_thread_stats(msg) + self.display_worker_thread_stats(msg['worker_threads']) + self.display_io_thread_stats(msg['io_threads']) print(f"{'='*60}") - for thr in msg['entries']: + print(f"IO Thread Usage of LDMS Operations") + for thr in msg['io_threads']: thr['ldms_xprt'] = dict(sorted(thr['ldms_xprt'].items(), key = lambda item: item[1], reverse = True)) total = sum(v for v in thr['ldms_xprt'].values()) print(f"{thr['tid']} {thr['thread_id']} {thr['name']}") diff --git a/ldms/src/ldmsd/ldmsd_request.c b/ldms/src/ldmsd/ldmsd_request.c index 97ddab199d..d7d918a135 100644 --- a/ldms/src/ldmsd/ldmsd_request.c +++ b/ldms/src/ldmsd/ldmsd_request.c @@ -7104,7 +7104,7 @@ static int __store_time_thread_tree(struct rbt *tree) * Sends a JSON formatted summary of Zap thread statistics as follows: * * { "count" : , - * "entries" : [ + * "io_threads" : [ * { "name" : , * "tid" : , * "thread_id" : , @@ -7121,9 +7121,20 @@ static int __store_time_thread_tree(struct rbt *tree) * } * }, * . . . + * ], + * "worker_threads" : [ + * { "name" : , + * "tid" : , + * "thread_id" : , + * "total_us" : , + * "idle_pc" : , + * "active_pc" : + * } * ] * } */ +extern void ldmsd_worker_thrstat_free(struct ldmsd_worker_thrstat_result *res); +extern struct ldmsd_worker_thrstat_result *ldmsd_worker_thrstat_get(); static char * __thread_stats_as_json(size_t *json_sz) { char *buff, *s; @@ -7136,6 +7147,8 @@ static char * __thread_stats_as_json(size_t *json_sz) struct rbt store_time_tree; struct rbn *rbn; struct store_time_thread *stime_ent; + struct ldmsd_worker_thrstat_result *wres = NULL; + struct ovis_scheduler_thrstat *wthr; s = buff = NULL; (void)clock_gettime(CLOCK_REALTIME, &start); @@ -7150,6 +7163,10 @@ static char * __thread_stats_as_json(size_t *json_sz) if (!res) goto __APPEND_ERR; + wres = ldmsd_worker_thrstat_get(); + if (!wres) + goto __APPEND_ERR; + buff = malloc(sz); if (!buff) goto __APPEND_ERR; @@ -7157,13 +7174,14 @@ static char * __thread_stats_as_json(size_t *json_sz) __APPEND("{"); __APPEND(" \"count\": %d,\n", res->count); - __APPEND(" \"entries\": [\n"); + __APPEND(" \"io_threads\": [\n"); for (i = 0; i < res->count; i++) { zthr = res->entries[i].zap_res; __APPEND(" {\n"); __APPEND(" \"name\": \"%s\",\n", zthr->name); __APPEND(" \"tid\": %d,\n", zthr->tid); __APPEND(" \"thread_id\": \"%p\",\n", (void*)zthr->thread_id); + __APPEND(" \"type\": \"io_thread\",\n"); __APPEND(" \"sample_count\": %g,\n", zthr->sample_count); __APPEND(" \"sample_rate\": %g,\n", zthr->sample_rate); __APPEND(" \"utilization\": %g,\n", zthr->utilization); @@ -7201,14 +7219,33 @@ static char * __thread_stats_as_json(size_t *json_sz) else __APPEND(" }\n"); } + __APPEND(" ],\n"); /* end of entries array */ + __APPEND(" \"worker_threads\": [\n"); + for (i = 0; i < wres->count; i++) { + wthr = wres->entries[i]; + __APPEND(" {\n"); + __APPEND(" \"name\": \"%s\",\n", wthr->name); + __APPEND(" \"tid\": %d,\n", wthr->tid); + __APPEND(" \"thread_id\": \"%p\",\n", (void*)wthr->thread_id); + __APPEND(" \"idle_pc\" : %lf,\n", wthr->idle_pc); + __APPEND(" \"active_pc\" : %lf,\n", wthr->active_pc); + __APPEND(" \"total_us\" : %ld,\n", wthr->dur); + __APPEND(" \"ev_cnt\" : %ld\n", wthr->ev_cnt); + if (i < wres->count - 1) + __APPEND(" },\n"); + else + __APPEND(" }\n"); + } + __APPEND(" ],\n"); /* end of worker threads */ (void)clock_gettime(CLOCK_REALTIME, &end); uint64_t compute_time = ldms_timespec_diff_us(&start, &end); - __APPEND(" ],\n"); /* end of entries array */ + __APPEND(" \"compute_time\": %ld\n", compute_time); __APPEND("}"); /* end */ *json_sz = s - buff + 1; ldms_thrstat_result_free(res); + ldmsd_worker_thrstat_free(wres); while ((rbn = rbt_min(&store_time_tree))) { rbt_del(&store_time_tree, rbn); stime_ent = container_of(rbn, struct store_time_thread, rbn); @@ -7217,6 +7254,7 @@ static char * __thread_stats_as_json(size_t *json_sz) return buff; __APPEND_ERR: ldms_thrstat_result_free(res); + ldmsd_worker_thrstat_free(wres); while ((rbn = rbt_min(&store_time_tree))) { rbt_del(&store_time_tree, rbn); stime_ent = container_of(rbn, struct store_time_thread, rbn);