Skip to content

Commit

Permalink
add use_base to dcgm sampler
Browse files Browse the repository at this point in the history
  • Loading branch information
baallan authored and tom95858 committed Dec 8, 2023
1 parent 05de014 commit 8ef06ee
Show file tree
Hide file tree
Showing 8 changed files with 338 additions and 70 deletions.
1 change: 1 addition & 0 deletions ldms/scripts/examples/.canned
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,7 @@ lustre_mdc
dstat.job
many
conf_csv
dcgm1

# not yet tested
rabbitv3
Expand Down
15 changes: 15 additions & 0 deletions ldms/scripts/examples/dcgm1
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
export plugname=dcgm
portbase=61073
VGARGS="--leak-check=full --track-origins=yes --trace-children=yes"
JOBDATA $TESTDIR/job.data 1 2
vgoff
LDMSD -p prolog.jobid 1 2
vgoff
MESSAGE ldms_ls on host 1:
LDMS_LS 1 -lv
SLEEP 1
MESSAGE ldms_ls on host 2:
LDMS_LS 2 -l
SLEEP 5
KILL_LDMSD 1 2
file_created $STOREDIR/node/$testname
3 changes: 3 additions & 0 deletions ldms/scripts/examples/dcgm1.1
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
load name=dcgm_sampler
config name=dcgm_sampler producer=localhost${i} schema=${testname} instance=localhost${i}/${testname} component_id=${i} interval=1000000 perm=757 uid=3556 gid=3556 job_set=instance=localhost${i}/job_info use_base=1
start name=dcgm_sampler interval=1000000 offset=0
15 changes: 15 additions & 0 deletions ldms/scripts/examples/dcgm1.2
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
# cannot load sampler instance on same node.

load name=store_csv
config name=store_csv path=${STOREDIR} altheader=0

prdcr_add name=localhost1 host=${HOST} type=active xprt=${XPRT} port=${port1} interval=2000000
prdcr_start name=localhost1

updtr_add name=allhosts interval=1000000 offset=100000
updtr_prdcr_add name=allhosts regex=.*
updtr_start name=allhosts

strgp_add name=store_${testname} plugin=store_csv schema=${testname} container=node
strgp_prdcr_add name=store_${testname} regex=.*
strgp_start name=store_${testname}
25 changes: 21 additions & 4 deletions ldms/src/sampler/dcgm_sampler/Makefile.am
Original file line number Diff line number Diff line change
@@ -1,17 +1,34 @@
bin_PROGRAMS = ldms-dcgm-list-fields

libdcgm_sampler_la_SOURCES = \
dcgm_sampler.c
dcgm_sampler.c

libdcgm_sampler_la_LIBADD = \
$(top_builddir)/ldms/src/sampler/libsampler_base.la \
$(top_builddir)/ldms/src/core/libldms.la \
$(top_builddir)/lib/src/coll/libcoll.la \
$(top_builddir)/ldms/src/sampler/libjobid_helper.la \
$(top_builddir)/ldms/src/sampler/libjobid_helper.la \
-ldcgm

libdcgm_sampler_la_LDFLAGS = \
-no-undefined \
-export-symbols-regex 'get_plugin' \
-version-info 1:0:0
-export-symbols-regex 'get_plugin' \
-version-info 1:0:0
libdcgm_sampler_la_CPPFLAGS = \
@OVIS_INCLUDE_ABS@

pkglib_LTLIBRARIES = libdcgm_sampler.la

dist_man7_MANS = Plugin_dcgm_sampler.man

dist_noinst_SCRIPTS = gen-ldms-dcgm-list-fields

ldms-dcgm-list-fields.c: $(srcdir)/gen-ldms-dcgm-list-fields
$(srcdir)/gen-ldms-dcgm-list-fields > ldms-dcgm-list-fields.c

ldms_dcgm_list_fields_SOURCES = ldms-dcgm-list-fields.c
ldms_dcgm_list_fields_CPPFLAGS = @OVIS_INCLUDE_ABS@
ldms_dcgm_list_fields_LDADD = -ldcgm

clean-local::
$(RM) $(builddir)/ldms_dcgm_list_fields.c
23 changes: 17 additions & 6 deletions ldms/src/sampler/dcgm_sampler/Plugin_dcgm_sampler.man
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ Plugin_dcgm_sampler - man page for the LDMS dcgm_sampler plugin
.SH SYNOPSIS
Within ldmsd_controller or a configuration file:
.br
config name=dcgm_sampler [ <attr>=<value> ]
config name=dcgm_sampler [ <attr>=<value> ] [use_base=1]

.SH DESCRIPTION
With LDMS (Lightweight Distributed Metric Service), plugins for the ldmsd (ldms daemon) are configured via ldmsd_controller
Expand All @@ -17,7 +17,7 @@ The schema is named "dcgm" by default.

.TP
.BR config
name=<plugin_name> interval=<interval(us)> [fields=<fields>] [schema=<schema_name>] [job_set=<metric set name>]
name=<plugin_name> interval=<interval(us)> [fields=<fields>] [schema=<schema_name>] [job_set=<metric set name>] [use_base=1 [[uid=<int>] [gid=<int>] [perm=<octal>] [instance=<name>] [producer=<name>] [job_id=<metric name in job_set set>]]
.br
configuration line
.RS
Expand All @@ -26,16 +26,24 @@ name=<plugin_name>
.br
This MUST be dcgm_sampler.
.TP
use_base=1
.br
This enables the sampler_base configuration option processing (see ldms_sampler_base(7)). If not given, the options not
listed below are ignored.
.TP
interval=<interval(us)>
.br
The sampling interval. This MUST be set to the same value that is
set on the "start" line, otherwise behavior is undetermined.
The DCGM library sampling interval (dcgmWatchFields() updateFreq). This MUST be set to the same value that is
set on the dcgm_sampler start line, otherwise behavior is undetermined.
.TP
fields=<fields>
.br
<fields> is a comma-separated list of integers representing DCGM field
numebers that the plugin should watch. By default the plugin will
watch fields 150,155.
identifiers that the plugin should watch. By default the plugin will
watch fields 150,155. The field identifier meanings are defined in dcgm_fields.h
and the DCGM Library API Reference Manual and may vary with DCGM release version.
The ldms-dcgm-list-fields command provides a table of fields, subject to hardware
support.
.TP
schema=<schema_name>
.br
Expand All @@ -59,5 +67,8 @@ config name=dcgm_sampler interval=1000000 fields=150,155,1001,1002,1003 schema=d
start name=dcgm_sampler interval=1000000
.fi

.SH NOTES
Multiple instances of the sampler cannot run on the same server.

.SH SEE ALSO
ldmsd(8), ldms_quickstart(7), ldmsd_controller(8), ldms_sampler_base(7)
Loading

0 comments on commit 8ef06ee

Please sign in to comment.