-
Notifications
You must be signed in to change notification settings - Fork 54
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
fix leaks and races in jobid parsing.
- Loading branch information
Showing
6 changed files
with
209 additions
and
6 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,133 @@ | ||
export plugname=linux_proc_sampler | ||
export dsname=$(ldms_dstat_schema_name mmalloc=1 io=1 fd=1 stat=1 auto-schema=1) | ||
export dstat_schema=$dsname | ||
export LDMSD_LOG_LEVEL=ERROR | ||
export LDMSD_LOG_TIME_SEC=1 | ||
export LDMSD_EXTRA="-m 128m" | ||
|
||
function SUSLEEP () { | ||
if test "$bypass" = "1"; then | ||
echo skipping sleep | ||
return 0 | ||
fi | ||
echo -n sleep $1 ... | ||
runuser -u $USER sleep $1 | ||
echo done | ||
} | ||
|
||
portbase=61060 | ||
cat << EOF > $LDMSD_RUN/exclude_env | ||
^COLORTERM | ||
^DBU.* | ||
^DESKTOP_SESSION | ||
^DISPLAY | ||
^GDM.* | ||
^GNO.* | ||
^GUESTFISH.* | ||
^XDG.* | ||
^LS_COLORS | ||
^SESSION_MANAGER | ||
^SSH.* | ||
^XAU.* | ||
^BASH_FUNC_m | ||
" | ||
EOF | ||
ldms-gen-syscall-map > $LDMSD_RUN/syscalls.map | ||
cat << EOF > $LDMSD_RUN/metrics.input | ||
{ "stream" : "slurm", | ||
"argv_sep":"\t", | ||
"syscalls": "${LDMSD_RUN}/syscalls.map", | ||
"argv_msg": 1, | ||
"log_send": 1, | ||
"env_msg": 1, | ||
"env_exclude": "${LDMSD_RUN}/exclude_env", | ||
"fd_msg": 1, | ||
"fd_exclude": [ | ||
"^/dev/", | ||
"^/run/", | ||
"^/var/", | ||
"^/etc/", | ||
"^/sys/", | ||
"^/tmp/", | ||
"^/proc/", | ||
"^/proc$", | ||
"^/ram/tmp/", | ||
"^/usr/lib", | ||
"^/usr/share/", | ||
"^/opt/ness", | ||
"^/ram/opt/ness", | ||
"^/ram/var/", | ||
"/.nfs0" | ||
], | ||
"published_pid_dir" : "${LDMSD_RUN}/ldms-netlink-tracked", | ||
"metrics" : [ | ||
"status_real_user", | ||
"status_eff_user", | ||
"status_real_group", | ||
"status_eff_group", | ||
"stat_pid" , | ||
"stat_state", | ||
"stat_rss", | ||
"stat_utime", | ||
"stat_stime", | ||
"io_read_b", | ||
"io_write_b", | ||
"syscall_name" | ||
] | ||
} | ||
EOF | ||
rm -f $LOGDIR/json*.log | ||
for pi in $(seq 80 100); do | ||
touch ${LDMSD_RUN}/ldms-netlink-tracked/$pi | ||
done | ||
/bin/rm $LOGDIR/nl.log | ||
|
||
JOBDATA $TESTDIR/job.data 1 | ||
|
||
drd="valgrind -v --tool=drd --log-file=$LOGDIR/vg.netlink.drd.txt --trace-cond=yes --trace-fork-join=yes" | ||
memcheck="valgrind -v --leak-check=full --track-origins=yes --trace-children=yes --log-file=$LOGDIR/vg.netlink.memcheck.txt --keep-debuginfo=yes --malloc-fill=3b" | ||
#${BUILDDIR}/sbin/ldms-netlink-notifier --port=61061 --auth=none --reconnect=1 -D 30 -r -j $LOGDIR/json.log --exclude-dir-path= --exclude-short-path= --exclude-programs --track-dir=${LDMSD_RUN}/ldms-netlink-tracked --purge-track-dir & | ||
|
||
${BUILDDIR}/sbin/ldms-netlink-notifier --port=61061 --auth=none --reconnect=1 -D 30 -r -j $LOGDIR/json.log --exclude-dir-path= --exclude-short-path= --exclude-programs --track-dir=${LDMSD_RUN}/ldms-netlink-tracked -x -e exec,clone,exit -L $LOGDIR/nl.log --heartbeat 1 -v 3 --ProducerName=$(hostname) --purge-track-dir --format 2 --jobid-file=$TESTDIR/job.data.1 & | ||
|
||
# uncomment next one to test duplicate handling | ||
#${BUILDDIR}/sbin/ldms-netlink-notifier --port=61061 --auth=none --reconnect=1 -D 30 -r -j $LOGDIR/json2.log --exclude-dir-path= --exclude-short-path= --exclude-programs & | ||
VGARGS="--tool=drd --trace-cond=yes --trace-fork-join=yes" | ||
VGARGS="--leak-check=full --track-origins=yes --trace-children=yes --show-leak-kinds=definite --time-stamp=yes --keep-debuginfo=yes --malloc-fill=3b" | ||
#vgon | ||
LDMSD -p prolog.jobid 1 | ||
vgoff | ||
LDMSD 2 | ||
LDMSD 3 | ||
vgoff | ||
JOBDATA $TESTDIR/job.data 1 | ||
SUSLEEP 2 | ||
MESSAGE ldms_ls on host 1: | ||
#LDMS_LS 1 -v | ||
MESSAGE ldms_ls on host 2: | ||
JOBDATA $TESTDIR/job.data 1 | ||
SUSLEEP 1 | ||
LDMS_LS 2 -v | ||
JOBDATA $TESTDIR/job.data 1 | ||
SUSLEEP 5 | ||
#MESSAGE stream_client_dump on sampler daemon 1 | ||
#for lc in $(seq 1); do | ||
#ldmsd_controller --auth none --port 61061 --cmd stream_client_dump | ||
# SUSLEEP 1 | ||
#done | ||
JOBDATA $TESTDIR/job.data 1 | ||
SUSLEEP 5 | ||
for lc in $(seq 1); do | ||
#LDMS_LS 1 -v | ||
JOBDATA $TESTDIR/job.data 1 | ||
SUSLEEP 2 | ||
done | ||
JOBDATA $TESTDIR/job.data 1 | ||
SUSLEEP 20 | ||
KILL_LDMSD 3 2 1 | ||
file_created $STOREDIR/node/$plugname | ||
file_created $STOREDIR/node/$dsname | ||
rollover_created $STOREDIR/blobs/linux_proc_sampler_argv.DAT | ||
rollover_created $STOREDIR/blobs/linux_proc_sampler_files.DAT | ||
rollover_created $STOREDIR/blobs/linux_proc_sampler_env.DAT | ||
rollover_created $STOREDIR/blobs/slurm.DAT |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,7 @@ | ||
load name=${plugname} | ||
config name=${plugname} producer=localhost${i} schema=${plugname} instance=localhost${i}/${plugname} component_id=${i} perm=0644 cfg_file=${LDMSD_RUN}/metrics.input | ||
start name=${plugname} interval=1000000 offset=0 | ||
|
||
# load name=dstat | ||
# config name=dstat producer=localhost${i} instance=localhost${i}/${dstat_schema} component_id=${i} mmalloc=1 io=1 fd=1 auto-schema=1 stat=1) perm=777 | ||
# start name=dstat interval=1000000 offset=0 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,25 @@ | ||
# blobs must be allowed by writer plugin and prdcr_subscribe by daemon | ||
load name=blob_stream_writer plugin=blob_stream_writer | ||
config name=blob_stream_writer path=${STOREDIR} container=blobs stream=slurm stream=linux_proc_sampler_env stream=linux_proc_sampler_argv types=1 stream=linux_proc_sampler_files | ||
|
||
load name=dstat | ||
config name=dstat producer=localhost${i} instance=localhost${i}/${dstat_schema} component_id=${i} mmalloc=1 io=1 fd=1 auto-schema=1 stat=1) perm=777 | ||
start name=dstat interval=1000000 offset=0 | ||
|
||
prdcr_add name=localhost1 host=${HOST} type=active xprt=${XPRT} port=${port1} interval=2000000 | ||
prdcr_subscribe regex=.* stream=slurm | ||
prdcr_subscribe regex=.* stream=linux_proc_sampler_argv | ||
prdcr_subscribe regex=.* stream=linux_proc_sampler_env | ||
prdcr_subscribe regex=.* stream=linux_proc_sampler_files | ||
prdcr_start name=localhost1 | ||
|
||
updtr_add name=allhosts interval=1000000 offset=100000 | ||
updtr_prdcr_add name=allhosts regex=.* | ||
updtr_start name=allhosts | ||
|
||
load name=store_csv | ||
config name=store_csv path=${STOREDIR} altheader=0 | ||
|
||
strgp_add name=store_${testname} plugin=store_csv schema=linux_proc_sampler container=node | ||
strgp_prdcr_add name=store_${testname} regex=.* | ||
strgp_start name=store_${testname} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,13 @@ | ||
prdcr_add name=localhost2 host=${HOST} type=active xprt=${XPRT} port=${port2} interval=2000000 | ||
prdcr_start name=localhost2 | ||
|
||
updtr_add name=allhosts interval=1000000 offset=200000 | ||
updtr_prdcr_add name=allhosts regex=.* | ||
updtr_start name=allhosts | ||
|
||
load name=store_csv | ||
config name=store_csv path=${STOREDIR} altheader=0 | ||
|
||
strgp_add name=store_dstat plugin=store_csv schema=${dstat_schema} container=node | ||
strgp_prdcr_add name=store_dstat regex=.* | ||
strgp_start name=store_dstat |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters