This repository has been archived by the owner on Oct 11, 2023. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
0 parents
commit f2d0525
Showing
9 changed files
with
576 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,28 @@ | ||
conf_path=/etc/zabbix/zabbix_agent.d | ||
scripts_path=/etc/zabbix/scripts | ||
|
||
all: | ||
@echo "usage: make install" | ||
|
||
install: install-requirement install-agent-config install-scripts set-config | ||
|
||
install-requirement: | ||
yum clean all && rpm -Uvh \ | ||
https://repo.zabbix.com/zabbix/3.0/rhel/7/x86_64/zabbix-release-3.0-1.el7.noarch.rpm | ||
yum -y install zabbix-agent python-pip | ||
pip install --upgrade pip && pip install nvidia-ml-py | ||
|
||
install-agent-config: | ||
install -o root -g root -m 644 userparameter_nvidia-smi.conf /etc/zabbix/zabbix_agentd.d | ||
|
||
install-scripts: | ||
install -d -o root -g root -m 755 ${scripts_path} | ||
install -o root -g root -m 755 \ | ||
get_gpus_info.sh nvidia-ml.py set_zabbix_config.sh ${scripts_path} | ||
|
||
set-config: | ||
bash ${scripts_path}/set_zabbix_config.sh | ||
|
||
clean: | ||
test ! -d ${conf_path} || rm -rf ${conf_path} | ||
test ! -d ${scripts_path} || rm -rf ${scripts_path} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,10 @@ | ||
* install | ||
- sudo make install | ||
|
||
* import zabbix template | ||
- import zbx_nvidia-smi-multi-gpu.xml to zabbix Templates | ||
- create GPU-Number and GPU-Avg-Utilization Graphs | ||
- select host and Add Template Nvidia GPUs Performance | ||
|
||
* restart zabbix-agentd | ||
- bash restart.sh |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,26 @@ | ||
#!/bin/bash | ||
|
||
result=$(python /etc/zabbix/scripts/nvidia-ml.py --summary) | ||
first=1 | ||
|
||
echo "{" | ||
echo "\"data\":[" | ||
|
||
while IFS= read -r line | ||
do | ||
if (( "$first" != "1" )) | ||
then | ||
echo , | ||
fi | ||
index=$(echo -n $line | cut -d ":" -f 1 | cut -d " " -f 2) | ||
gpuuuid=$(echo -n $line | cut -d ":" -f 3 | tr -d ")" | tr -d " ") | ||
echo -n {"\"{#GPUINDEX}"\":\"$index"\", \"{#GPUUUID}"\":\"$gpuuuid\"} | ||
if (( "$first" == "1" )) | ||
then | ||
first=0 | ||
fi | ||
done < <(printf '%s\n' "$result") | ||
|
||
echo | ||
echo "]" | ||
echo "}" |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,26 @@ | ||
#!/bin/bash | ||
|
||
function log () { | ||
echo $(date +"[%Y-%m-%d %H:%M:%S]") $@ | ||
} | ||
|
||
if [ $# -lt 1 ]; then | ||
log "Usage: sh $0 host" | ||
exit | ||
fi | ||
|
||
host=$1 | ||
user=root | ||
|
||
function install_zabbix_agentd () { | ||
log "mkdir" | ||
ssh -t ${user}@${host} "mkdir /home/admin/zabbix && chown -R admin:admin /home/admin/zabbix" | ||
|
||
log "copy file" | ||
scp ./* ${user}@${host}:/home/admin/zabbix | ||
|
||
log "exec install" | ||
ssh -t ${user}@${host} "cd /home/admin/zabbix && make install && sh restart.sh" | ||
} | ||
|
||
install_zabbix_agentd |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,135 @@ | ||
#!/usr/local/bin python | ||
# coding=utf-8 | ||
|
||
import optparse, sys, string | ||
from pynvml import * | ||
|
||
class OptionClass: | ||
def __init__(self): | ||
self.id = None | ||
self.properties = None | ||
self.number = None | ||
self.summary = None | ||
self.avgGpuUtil = None | ||
self.helpProperties = None | ||
self.parser = None | ||
|
||
def parse(self): | ||
option_list = [ | ||
optparse.make_option("-i", "--id", | ||
action="store", type="string", dest="id", | ||
help="Specific GPU unit id"), | ||
optparse.make_option("-p", "--properties", | ||
action="store", type="string", dest="properties", | ||
help="Query GPU properties"), | ||
optparse.make_option("--number", | ||
action="store_true", dest="number", help="Number of GPUs"), | ||
optparse.make_option("--summary", | ||
action="store_true", dest="summary", help="Summary list GPUs"), | ||
optparse.make_option("--avg-gpu-util", | ||
action="store_true", dest="avgGpuUtil", help="Average GPU utilization"), | ||
optparse.make_option("--help-properties", | ||
action="store_true", dest="helpProperties", help="Help properties of GPUs"), | ||
] | ||
|
||
self.parser = optparse.OptionParser(option_list=option_list) | ||
(options, args) = self.parser.parse_args() | ||
|
||
if options.id is not None: | ||
self.id = int(options.id) | ||
if options.properties is not None: | ||
self.properties = options.properties | ||
if options.number is not None: | ||
self.number = options.number | ||
if options.summary is not None: | ||
self.summary = options.summary | ||
if options.avgGpuUtil is not None: | ||
self.avgGpuUtil = options.avgGpuUtil | ||
if options.helpProperties is not None: | ||
self.helpProperties = options.helpProperties | ||
|
||
def printHelpProperties(self): | ||
print("--properties=utilization.gpu", "Percent of executing on the GPU") | ||
print("--properties=memory.used", "Percent of used memory on the GPU") | ||
|
||
def validate(self): | ||
if self.helpProperties: | ||
self.printHelpProperties() | ||
sys.exit(0) | ||
|
||
if self.number or self.summary or self.avgGpuUtil: | ||
pass | ||
return | ||
|
||
if self.id is None or self.properties is None: | ||
self.parser.print_help() | ||
sys.exit(1) | ||
|
||
class NvmlClass: | ||
def __init__(self): | ||
nvmlInit() | ||
|
||
def __del__(self): | ||
nvmlShutdown() | ||
|
||
def getDeviceNumber(self): | ||
deviceCount = nvmlDeviceGetCount() | ||
return deviceCount | ||
|
||
def getDeviceSummary(self): | ||
summaryList = [] | ||
deviceCount = nvmlDeviceGetCount() | ||
for i in range(deviceCount): | ||
handle = nvmlDeviceGetHandleByIndex(i) | ||
name = nvmlDeviceGetName(handle) | ||
uuid = nvmlDeviceGetUUID(handle) | ||
info = {"id":i, "name":name, "uuid": uuid} | ||
summaryList.append(info) | ||
return summaryList | ||
|
||
def getDeviceUtilizationGPU(self, id): | ||
handle = nvmlDeviceGetHandleByIndex(int(id)) | ||
util = nvmlDeviceGetUtilizationRates(handle) | ||
return int(util.gpu) | ||
|
||
def getDeviceUtilizationGPUAvg(self): | ||
deviceCount = nvmlDeviceGetCount() | ||
util_gpu = 0.0 | ||
for i in range(deviceCount): | ||
handle = nvmlDeviceGetHandleByIndex(i) | ||
util = nvmlDeviceGetUtilizationRates(handle) | ||
util_gpu += util.gpu | ||
|
||
return int(util_gpu / deviceCount) | ||
|
||
def getDeviceMemoryUsed(self, id): | ||
handle = nvmlDeviceGetHandleByIndex(int(id)) | ||
mem_info = nvmlDeviceGetMemoryInfo(handle) | ||
return int(float(mem_info.used) / float(mem_info.total) * 100) | ||
|
||
def main(): | ||
option = OptionClass() | ||
option.parse() | ||
option.validate() | ||
|
||
nvml = NvmlClass() | ||
if option.number: | ||
deviceCount = nvml.getDeviceNumber() | ||
print("GPU number:%d" % deviceCount) | ||
elif option.summary: | ||
for summary in nvml.getDeviceSummary(): | ||
print("GPU %d: %s (UUID: %s)") % \ | ||
(summary['id'], summary['name'], summary['uuid']) | ||
elif option.avgGpuUtil: | ||
print("GPU avg util:%d" % nvml.getDeviceUtilizationGPUAvg()) | ||
elif option.properties == "utilization.gpu": | ||
print("GPU %d util:%d") % (option.id, nvml.getDeviceUtilizationGPU(option.id)) | ||
elif option.properties == "memory.used": | ||
print("GPU %d mem used:%d") % (option.id, nvml.getDeviceMemoryUsed(option.id)) | ||
else: | ||
print("Invalid properties:", option.properties) | ||
option.printHelpProperties() | ||
sys.exit(1) | ||
|
||
if __name__ == "__main__": | ||
main() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,10 @@ | ||
#!/bin/bash | ||
|
||
#stop | ||
pids=$(ps -ef | grep zabbix_agentd | grep -v 'grep' | awk '{print $2}' | xargs) | ||
if [ ! -z "${pids}" ];then | ||
kill -9 ${pids} | ||
fi | ||
|
||
#start | ||
/usr/sbin/zabbix_agentd |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,14 @@ | ||
#!/bin/bash | ||
modify_user=$(whoami) | ||
modify_date=$(date +%Y-%m-%d) | ||
monitor_host=10.88.128.40 | ||
hosts_conf=/etc/hosts | ||
zabbix_agentd_conf=/etc/zabbix/zabbix_agentd.conf | ||
|
||
sed -i "s/$(grep 'AllowRoot=' ${zabbix_agentd_conf})/AllowRoot=1/g" ${zabbix_agentd_conf} | ||
sed -i "s/$(grep 'Server=' ${zabbix_agentd_conf} | grep -v '#')/Server=monitor.dev.rokid-inc.com/g" ${zabbix_agentd_conf} | ||
sed -i "s/$(grep 'ServerActive=' ${zabbix_agentd_conf} | grep -v '#')/ServerActive=monitor.dev.rokid-inc.com/g" ${zabbix_agentd_conf} | ||
|
||
echo -e "\n" >> ${hosts_conf} | ||
echo "#add by ${modify_user} ${modify_date}" >> ${hosts_conf} | ||
echo "${monitor_host} monitor.dev.rokid-inc.com" >> ${hosts_conf} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,5 @@ | ||
UserParameter=gpu.number,python /etc/zabbix/scripts/nvidia-ml.py --number | cut -d ":" -f2 | ||
UserParameter=gpu.avgutilization,python /etc/zabbix/scripts/nvidia-ml.py --avg-gpu-util | cut -d ":" -f2 | ||
UserParameter=gpu.discovery,/etc/zabbix/scripts/get_gpus_info.sh | ||
UserParameter=gpu.utilization[*],python /etc/zabbix/scripts/nvidia-ml.py -i $1 -p utilization.gpu | cut -d ":" -f2 | ||
UserParameter=gpu.memoryused[*],python /etc/zabbix/scripts/nvidia-ml.py -i $1 -p memory.used | cut -d ":" -f2 |
Oops, something went wrong.