Skip to content

Commit

Permalink
[WIP] Report LDMSD's worker thread usages in thread_stats
Browse files Browse the repository at this point in the history
  • Loading branch information
nichamon committed Dec 20, 2023
1 parent 259cde0 commit c0a5b24
Show file tree
Hide file tree
Showing 2 changed files with 62 additions and 7 deletions.
25 changes: 21 additions & 4 deletions ldms/python/ldmsd/ldmsd_controller
Original file line number Diff line number Diff line change
Expand Up @@ -2161,13 +2161,25 @@ class LdmsdCmdParser(cmd.Cmd):
def complete_set_route(self, text, line, begidx, endidx):
return self.__complete_attr_list('set_info', text)

def display_thread_stats(self, stats):
def display_worker_thread_stats(self, worker_threads):
print(f"LDMSD Worker Thread Statistics")
print(f"{'='*60}")
print(f"{'Thread ID':15} {'Linux Thread ID':20} {'Idle':15} {'Active':15} {'Duration (sec)':15} {'Event Counts':15}")
print(f"{'-'*15} {'-'*20} {'-'*15} {'-'*15} {'-'*15} {'-'*15}")
for e in worker_threads:
print(f"{e['tid']:^15} {e['thread_id']:20} {e['idle_pc']:14.2f}% " \
f"{e['active_pc']:14.2f}% {e['total_us']/1000000:15.2f}" \
f"{e['ev_cnt']:15}")

def display_io_thread_stats(self, io_threads):
print(f"IO Thread Statistics")
print(f"{'='*60}")
print(f"{'Thread ID':15} {'Linux Thread ID':20} {'Name':16} {'Samples':12} {'Sample Rate':12} " \
f"{'Utilization':12} {'Send Queue Size':16} " \
f"{'Num of EPs':12}")
print(f"{'-'*15} {'-'*20} ---------------- ------------ ------------ ------------ "\
"---------------- ------------")
for e in stats['entries']:
for e in io_threads:
print(f"{e['tid']:^15} {e['thread_id']:20} {e['name']:16} {e['sample_count']:12.0f} " \
f"{e['sample_rate']:12.2f} {e['utilization'] * 100:12.2f} " \
f"{e['sq_sz']:16} {e['n_eps']:12}")
Expand All @@ -2193,11 +2205,16 @@ class LdmsdCmdParser(cmd.Cmd):
if msg == "":
return
if rc != 0:
print(f"Error {msg['errcode']}: {resp['msg']}")
return
msg = fmt_status(msg)
self.display_thread_stats(msg)
self.display_worker_thread_stats(msg['worker_threads'])
print(f"{'='*60}")
self.display_io_thread_stats(msg['io_threads'])
print(f"{'='*60}")
print(f"IO Thread Usage of LDMS Operations")
print(f"{'='*60}")
for thr in msg['entries']:
for thr in msg['io_threads']:
thr['ldms_xprt'] = dict(sorted(thr['ldms_xprt'].items(), key = lambda item: item[1], reverse = True))
total = sum(v for v in thr['ldms_xprt'].values())
print(f"{thr['tid']} {thr['thread_id']} {thr['name']}")
Expand Down
44 changes: 41 additions & 3 deletions ldms/src/ldmsd/ldmsd_request.c
Original file line number Diff line number Diff line change
Expand Up @@ -7104,7 +7104,7 @@ static int __store_time_thread_tree(struct rbt *tree)
* Sends a JSON formatted summary of Zap thread statistics as follows:
*
* { "count" : <int>,
* "entries" : [
* "io_threads" : [
* { "name" : <string>,
* "tid" : <tid>,
* "thread_id" : <Linux Thread ID>,
Expand All @@ -7121,9 +7121,20 @@ static int __store_time_thread_tree(struct rbt *tree)
* }
* },
* . . .
* ],
* "worker_threads" : [
* { "name" : <string>,
* "tid" : <tid>,
* "thread_id" : <Linux Thread ID>,
* "total_us" : <Total time in micro-seconds>,
* "idle_pc" : <percentage of idle time>,
* "active_pc" : <percentage of active time>
* }
* ]
* }
*/
extern void ldmsd_worker_thrstat_free(struct ldmsd_worker_thrstat_result *res);
extern struct ldmsd_worker_thrstat_result *ldmsd_worker_thrstat_get();
static char * __thread_stats_as_json(size_t *json_sz)
{
char *buff, *s;
Expand All @@ -7136,6 +7147,8 @@ static char * __thread_stats_as_json(size_t *json_sz)
struct rbt store_time_tree;
struct rbn *rbn;
struct store_time_thread *stime_ent;
struct ldmsd_worker_thrstat_result *wres = NULL;
struct ovis_scheduler_thrstat *wthr;
s = buff = NULL;

(void)clock_gettime(CLOCK_REALTIME, &start);
Expand All @@ -7150,20 +7163,25 @@ static char * __thread_stats_as_json(size_t *json_sz)
if (!res)
goto __APPEND_ERR;

wres = ldmsd_worker_thrstat_get();
if (!wres)
goto __APPEND_ERR;

buff = malloc(sz);
if (!buff)
goto __APPEND_ERR;
s = buff;

__APPEND("{");
__APPEND(" \"count\": %d,\n", res->count);
__APPEND(" \"entries\": [\n");
__APPEND(" \"io_threads\": [\n");
for (i = 0; i < res->count; i++) {
zthr = res->entries[i].zap_res;
__APPEND(" {\n");
__APPEND(" \"name\": \"%s\",\n", zthr->name);
__APPEND(" \"tid\": %d,\n", zthr->tid);
__APPEND(" \"thread_id\": \"%p\",\n", (void*)zthr->thread_id);
__APPEND(" \"type\": \"io_thread\",\n");
__APPEND(" \"sample_count\": %g,\n", zthr->sample_count);
__APPEND(" \"sample_rate\": %g,\n", zthr->sample_rate);
__APPEND(" \"utilization\": %g,\n", zthr->utilization);
Expand Down Expand Up @@ -7201,14 +7219,33 @@ static char * __thread_stats_as_json(size_t *json_sz)
else
__APPEND(" }\n");
}
__APPEND(" ],\n"); /* end of entries array */
__APPEND(" \"worker_threads\": [\n");
for (i = 0; i < wres->count; i++) {
wthr = wres->entries[i];
__APPEND(" {\n");
__APPEND(" \"name\": \"%s\",\n", wthr->name);
__APPEND(" \"tid\": %d,\n", wthr->tid);
__APPEND(" \"thread_id\": \"%p\",\n", (void*)wthr->thread_id);
__APPEND(" \"idle_pc\" : %lf,\n", wthr->idle_pc);
__APPEND(" \"active_pc\" : %lf,\n", wthr->active_pc);
__APPEND(" \"total_us\" : %ld,\n", wthr->dur);
__APPEND(" \"ev_cnt\" : %ld\n", wthr->ev_cnt);
if (i < wres->count - 1)
__APPEND(" },\n");
else
__APPEND(" }\n");
}
__APPEND(" ],\n"); /* end of worker threads */
(void)clock_gettime(CLOCK_REALTIME, &end);
uint64_t compute_time = ldms_timespec_diff_us(&start, &end);
__APPEND(" ],\n"); /* end of entries array */

__APPEND(" \"compute_time\": %ld\n", compute_time);
__APPEND("}"); /* end */

*json_sz = s - buff + 1;
ldms_thrstat_result_free(res);
ldmsd_worker_thrstat_free(wres);
while ((rbn = rbt_min(&store_time_tree))) {
rbt_del(&store_time_tree, rbn);
stime_ent = container_of(rbn, struct store_time_thread, rbn);
Expand All @@ -7217,6 +7254,7 @@ static char * __thread_stats_as_json(size_t *json_sz)
return buff;
__APPEND_ERR:
ldms_thrstat_result_free(res);
ldmsd_worker_thrstat_free(wres);
while ((rbn = rbt_min(&store_time_tree))) {
rbt_del(&store_time_tree, rbn);
stime_ent = container_of(rbn, struct store_time_thread, rbn);
Expand Down

0 comments on commit c0a5b24

Please sign in to comment.