Skip to content

Commit

Permalink
Merge branch 'OVIS-4.4.2' into OVIS-4.4.2
Browse files Browse the repository at this point in the history
  • Loading branch information
tom95858 authored Feb 14, 2024
2 parents aefe68d + 7797d3d commit a42c5fd
Show file tree
Hide file tree
Showing 26 changed files with 337 additions and 98 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/4.3.3-compat-test.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ on:
push:
branches: [ OVIS-4 ]
pull_request:
branches: [ OVIS-4 ]
branches: [ 'OVIS-4**' ]

defaults:
run:
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/build-ddebug-centos7.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ on:
push:
branches: [ OVIS-4 ]
pull_request:
branches: [ OVIS-4 ]
branches: [ 'OVIS-4**' ]

jobs:
build:
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/build-test-centos7.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ on:
push:
branches: [ OVIS-4 ]
pull_request:
branches: [ OVIS-4 ]
branches: [ 'OVIS-4**' ]

jobs:
build:
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/build-test-ubuntu-22.04.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ on:
push:
branches: [ OVIS-4 ]
pull_request:
branches: [ OVIS-4 ]
branches: [ 'OVIS-4**' ]

jobs:
build:
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/build-test-ubuntu.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ on:
push:
branches: [ OVIS-4 ]
pull_request:
branches: [ OVIS-4 ]
branches: [ 'OVIS-4**' ]

jobs:
build:
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/ldms-test-build.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ on:
push:
branches: [ OVIS-4 ]
pull_request:
branches: [ OVIS-4 ]
branches: [ 'OVIS-4**' ]

jobs:
build:
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/space-check.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ name: Space Check

on:
pull_request:
branches: [ OVIS-4 ]
branches: [ 'OVIS-4**' ]

jobs:
space_check:
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/test-make-dist.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ on:
push:
branches: [ OVIS-4 ]
pull_request:
branches: [ OVIS-4 ]
branches: [ 'OVIS-4**' ]

jobs:
build:
Expand Down
6 changes: 4 additions & 2 deletions configure.ac
Original file line number Diff line number Diff line change
Expand Up @@ -468,7 +468,8 @@ AC_ARG_ENABLE([ibnet],
[],
[enable_ibnet="check"])
AM_CONDITIONAL([ENABLE_IBNET], [test "x$enable_ibnet" != xno])
AS_IF([test "x$enable_ibnet" = xyes],[
AS_IF([test "$enable_ibnet" = xyes],[
AC_MSG_NOTICE([Disable ibnet module NOT requested])
AS_IF([test "x$HAVE_LIBIBMAD" = xno],
[AC_MSG_ERROR([ibnet required libibmad or <infiniband/mad.h> not found])])
AS_IF([test "x$HAVE_LIBIBUMAD" = xno],
Expand Down Expand Up @@ -914,14 +915,15 @@ AM_CONDITIONAL([SYSCONFDIR_NOT_ETC], [test "${sysconfdir}" != "/etc"])
AC_LIB_HAVE_LINKFLAGS([cxi], [], [
#include <stddef.h> /* libcxi.h fails to include this */
#include <libcxi/libcxi.h>
#include <cassini_cntr_desc.h> /* needed at least starting with shs-2.1.0 */
])
AM_CONDITIONAL([HAVE_LIBCXI], [test "x$HAVE_LIBCXI" = xyes])

AC_ARG_ENABLE([slingshot],
[AS_HELP_STRING([--enable-slingshot], [require the slinghost related plugins @<:@default=check@:>@])],
[],
[enable_slingshot="check"])
AM_CONDITIONAL([ENABLE_SLINGSHOT], [test "x$enable_slingshot" != xno])
AM_CONDITIONAL([ENABLE_SLINGSHOT], [test "x$enable_slingshot" != xno -a "x$HAVE_LIBCXI" = xyes])
AS_IF([test "x$enable_slingshot" = xyes],[
AS_IF([test "x$HAVE_LIBCXI" = xno],
[AC_MSG_ERROR([libcxi or its headers not found])])
Expand Down
15 changes: 15 additions & 0 deletions ldms/scripts/examples/dcgm1
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
export plugname=dcgm
portbase=61073
VGARGS="--leak-check=full --track-origins=yes --trace-children=yes"
JOBDATA $TESTDIR/job.data 1 2
vgoff
LDMSD -p prolog.jobid 1 2
vgoff
MESSAGE ldms_ls on host 1:
LDMS_LS 1 -lv
SLEEP 1
MESSAGE ldms_ls on host 2:
LDMS_LS 2 -l
SLEEP 5
KILL_LDMSD 1 2
file_created $STOREDIR/node/$testname
3 changes: 3 additions & 0 deletions ldms/scripts/examples/dcgm1.1
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
load name=dcgm_sampler
config name=dcgm_sampler producer=localhost${i} schema=${testname} instance=localhost${i}/${testname} component_id=${i} interval=1000000 perm=757 uid=3556 gid=3556 job_set=instance=localhost${i}/job_info use_base=1
start name=dcgm_sampler interval=1000000 offset=0
15 changes: 15 additions & 0 deletions ldms/scripts/examples/dcgm1.2
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
# cannot load sampler instance on same node.

load name=store_csv
config name=store_csv path=${STOREDIR} altheader=0

prdcr_add name=localhost1 host=${HOST} type=active xprt=${XPRT} port=${port1} interval=2000000
prdcr_start name=localhost1

updtr_add name=allhosts interval=1000000 offset=100000
updtr_prdcr_add name=allhosts regex=.*
updtr_start name=allhosts

strgp_add name=store_${testname} plugin=store_csv schema=${testname} container=node
strgp_prdcr_add name=store_${testname} regex=.*
strgp_start name=store_${testname}
3 changes: 1 addition & 2 deletions ldms/src/core/ldms.c
Original file line number Diff line number Diff line change
Expand Up @@ -855,8 +855,7 @@ static void __destroy_set_no_lock(void *v)
__ldms_set_info_delete(&set->local_info);
__ldms_set_info_delete(&set->remote_info);
zap_unmap(set->lmap);
if (set->rmap)
zap_unmap(set->rmap);
zap_unmap(set->rmap);
free(set);
}

Expand Down
13 changes: 11 additions & 2 deletions ldms/src/core/ldms_xprt.c
Original file line number Diff line number Diff line change
Expand Up @@ -672,6 +672,11 @@ static void process_set_delete_request(struct ldms_xprt *x, struct ldms_request
set = __ldms_find_local_set(req->set_delete.inst_name);
__ldms_set_tree_unlock();
if (set) {
pthread_mutex_lock(&set->lock);
zap_unmap(set->lmap);
zap_unmap(set->rmap);
set->lmap = set->rmap = NULL;
pthread_mutex_unlock(&set->lock);
if (set->xprt != x) {
assert(set->xprt != x);
goto reply_1;
Expand Down Expand Up @@ -1553,8 +1558,12 @@ int __ldms_remote_update(ldms_t x, ldms_set_t s, ldms_update_cb_t cb, void *arg)
if (!ldms_xprt_connected(x))
return ENOTCONN;

if (!s->lmap || !s->rmap)
pthread_mutex_lock(&s->lock);
if (!s->lmap || !s->rmap) {
pthread_mutex_unlock(&s->lock);
return EINVAL;
}
pthread_mutex_unlock(&s->lock);

if (LDMS_XPRT_AUTH_GUARD(x))
return EPERM;
Expand Down Expand Up @@ -2551,8 +2560,8 @@ static void handle_rendezvous_lookup(zap_ep_t zep, zap_event_t ev,

if (lset) {
rc = EEXIST;
lset = NULL; /* So error path won't try to delete it */
ref_put(&lset->ref, "__ldms_find_local_set");
lset = NULL; /* So error path won't try to delete it */
/* unmap ev->map, it is not used */
zap_unmap(ev->map);
goto callback;
Expand Down
23 changes: 2 additions & 21 deletions ldms/src/decomp/as_is/decomp_as_is.c
Original file line number Diff line number Diff line change
Expand Up @@ -578,6 +578,8 @@ __get_row_cfg(__decomp_as_is_cfg_t dcfg, ldms_set_t set)
return drow;

err:
if (name)
free(name);
if (drow->cols) {
for (i = 0; i < drow->col_count; i++) {
free(drow->cols[i].name);
Expand Down Expand Up @@ -633,37 +635,16 @@ static int __decomp_as_is_decompose(ldmsd_strgp_t strgp, ldms_set_t set,
TAILQ_HEAD(, _list_entry) list_cols;
int row_more_le;
struct ldms_timestamp ts;
const char *set_schema;
int row_schema_name_len;
ldms_mval_t phony;
char *row_schema_name = NULL;

if (!TAILQ_EMPTY(row_list))
return EINVAL;

ts = ldms_transaction_timestamp_get(set);

set_schema = ldms_set_schema_name_get(set);

TAILQ_INIT(&list_cols);
ldms_digest = ldms_set_digest_get(set);

/*
* NOTE Create rows from the set as-is, with list entry expansion.
*
* The schema format is "<schema_name>_<short_sha>", where the
* "<short_sha>" is the first 7 characters of the hex string
* representation of the SHA (similar to git short commit ID).
*
*/
row_schema_name_len = asprintf(&row_schema_name, "%s_%02hhx%02hhx%02hhx%hhx", set_schema,
ldms_digest->digest[0],
ldms_digest->digest[1],
ldms_digest->digest[2],
(unsigned char)(ldms_digest->digest[3] >> 4));
if (row_schema_name_len < 0)
return errno;

drow = __get_row_cfg(dcfg, set);
if (!drow)
return errno;
Expand Down
2 changes: 1 addition & 1 deletion ldms/src/ldmsd/ldmsd.c
Original file line number Diff line number Diff line change
Expand Up @@ -327,7 +327,7 @@ int log_actor(ev_worker_t src, ev_worker_t dst, ev_status_t status, ev_t ev)
rc = __logrotate();
} else {
rc = __log(level, msg, tv, tm);
if (0 == ev_pending(logger_w))
if (0 == ev_pending(logger_w) && log_fp != LDMSD_LOG_SYSLOG)
fflush(log_fp);
free(msg);
}
Expand Down
32 changes: 25 additions & 7 deletions ldms/src/ldmsd/ldmsd_config.c
Original file line number Diff line number Diff line change
Expand Up @@ -329,23 +329,41 @@ int ldmsd_config_plugin(char *plugin_name,

avl = calloc(1, sizeof(*avl));
kwl = calloc(1, sizeof(*kwl));
if (!avl || !kwl)
return ENOMEM;
if (!avl || !kwl) {
rc = ENOMEM;
goto err;
}
avl->av_list = av_copy(_av_list);
kwl->av_list = av_copy(_kw_list);
if (!avl->av_list || !kwl->av_list)
return ENOMEM;
if (!avl->av_list || !kwl->av_list) {
rc = ENOMEM;
goto err;
}

pi = ldmsd_get_plugin(plugin_name);
if (!pi)
return ENOENT;
if (!pi) {
rc = ENOENT;
goto err;
}

pthread_mutex_lock(&pi->lock);
rc = pi->plugin->config(pi->plugin, _kw_list, _av_list);
TAILQ_INSERT_TAIL(&pi->plugin->kwl_q, kwl, entry);
TAILQ_INSERT_TAIL(&pi->plugin->avl_q, avl, entry);
pthread_mutex_unlock(&pi->lock);
return rc;
err:
if (avl) {
if (avl->av_list)
av_free(avl->av_list);
free(avl);
}
if (kwl) {
if (kwl->av_list)
av_free(kwl->av_list);
free(kwl);
}
return rc;
}

int _ldmsd_set_udata(ldms_set_t set, char *metric_name, uint64_t udata,
Expand Down Expand Up @@ -1159,7 +1177,7 @@ int ldmsd_plugins_usage(const char *plugname)
char *libpath;
char *saveptr = NULL;

if (0 == strcmp(plugname, "all"))
if (plugname && 0 == strcmp(plugname, "all"))
plugname = NULL;

char *path = getenv("LDMSD_PLUGIN_LIBPATH");
Expand Down
1 change: 1 addition & 0 deletions ldms/src/ldmsd/ldmsd_request.c
Original file line number Diff line number Diff line change
Expand Up @@ -2606,6 +2606,7 @@ static int strgp_add_handler(ldmsd_req_ctxt_t reqc)
free(container);
free(schema);
free(perm_s);
free(decomp);
return 0;
}

Expand Down
2 changes: 2 additions & 0 deletions ldms/src/sampler/dcgm_sampler/Makefile.am
Original file line number Diff line number Diff line change
@@ -1,8 +1,10 @@
libdcgm_sampler_la_SOURCES = \
dcgm_sampler.c
libdcgm_sampler_la_LIBADD = \
$(top_builddir)/ldms/src/sampler/libsampler_base.la \
$(top_builddir)/ldms/src/core/libldms.la \
$(top_builddir)/lib/src/coll/libcoll.la \
$(top_builddir)/lib/src/ovis_util/libovis_util.la \
$(top_builddir)/ldms/src/sampler/libjobid_helper.la \
-ldcgm
libdcgm_sampler_la_LDFLAGS = \
Expand Down
20 changes: 14 additions & 6 deletions ldms/src/sampler/dcgm_sampler/Plugin_dcgm_sampler.man
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ Plugin_dcgm_sampler - man page for the LDMS dcgm_sampler plugin
.SH SYNOPSIS
Within ldmsd_controller or a configuration file:
.br
config name=dcgm_sampler [ <attr>=<value> ]
config name=dcgm_sampler [ <attr>=<value> ] [use_base=<*>]

.SH DESCRIPTION
With LDMS (Lightweight Distributed Metric Service), plugins for the ldmsd (ldms daemon) are configured via ldmsd_controller
Expand All @@ -17,7 +17,7 @@ The schema is named "dcgm" by default.

.TP
.BR config
name=<plugin_name> interval=<interval(us)> [fields=<fields>] [schema=<schema_name>] [job_set=<metric set name>]
name=<plugin_name> interval=<interval(us)> [fields=<fields>] [schema=<schema_name>] [job_set=<metric set name>] [use_base=<*> [uid=<int>] [gid=<int>] [perm=<octal>] [instance=<name>] [producer=<name>] [job_id=<metric name in job_set set>]]
.br
configuration line
.RS
Expand All @@ -26,16 +26,24 @@ name=<plugin_name>
.br
This MUST be dcgm_sampler.
.TP
use_base=<*>
.br
Any value given enables the sampler_base configuration option processing (see ldms_sampler_base(7)). If not given, the options not
listed below are ignored.
.TP
interval=<interval(us)>
.br
The sampling interval. This MUST be set to the same value that is
set on the "start" line, otherwise behavior is undetermined.
The DCGM library sampling interval (dcgmWatchFields() updateFreq). This MUST be set to the same value that is
set on the dcgm_sampler start line, otherwise behavior is undetermined.
.TP
fields=<fields>
.br
<fields> is a comma-separated list of integers representing DCGM field
numebers that the plugin should watch. By default the plugin will
watch fields 150,155.
identifiers that the plugin should watch. By default the plugin will
watch fields 150,155. The field identifier meanings are defined in dcgm_fields.h
and the DCGM Library API Reference Manual and may vary with DCGM release version.
The plugin usage message provides a table of fields, subject to hardware
support; see the output of 'ldms-plugins.sh dcgm_sampler'.
.TP
schema=<schema_name>
.br
Expand Down
Loading

0 comments on commit a42c5fd

Please sign in to comment.