From 2dc1000e641e050a0ffb070e94e4cc25a26f7702 Mon Sep 17 00:00:00 2001 From: Gil Hoggarth Date: Tue, 11 Aug 2020 11:25:14 +0100 Subject: [PATCH 01/11] harness of stat_pusher --- stat-pusher/requirements.txt | 1 + stat-pusher/run_stat_pusher.sh | 16 ++++++++++++++++ stat-pusher/update_pushgateway_stats.py | 15 +++++++++++++++ 3 files changed, 32 insertions(+) create mode 100644 stat-pusher/requirements.txt create mode 100755 stat-pusher/run_stat_pusher.sh create mode 100755 stat-pusher/update_pushgateway_stats.py diff --git a/stat-pusher/requirements.txt b/stat-pusher/requirements.txt new file mode 100644 index 0000000..a0753df --- /dev/null +++ b/stat-pusher/requirements.txt @@ -0,0 +1 @@ +prometheus_client diff --git a/stat-pusher/run_stat_pusher.sh b/stat-pusher/run_stat_pusher.sh new file mode 100755 index 0000000..d720e65 --- /dev/null +++ b/stat-pusher/run_stat_pusher.sh @@ -0,0 +1,16 @@ +#!/usr/bin/env bash + +# if wa-internal envars needed, source file +source ~/gitlab/ukwa-monitor/monitoring.sh + +# setup venv +#### venv created via 'virtualenv -p /usr/local/bin/python3.7 venv' +export PYTHONPATH=~/github/ukwa-monitor/stat-pusher +source $PYTHONPATH/venv/bin/activate +cd $PYTHONPATH + +# ensure python libraries installed +pip install -r requirements.txt + +# run stat-pusher script +python update_pushgateway_stats.py diff --git a/stat-pusher/update_pushgateway_stats.py b/stat-pusher/update_pushgateway_stats.py new file mode 100755 index 0000000..379242f --- /dev/null +++ b/stat-pusher/update_pushgateway_stats.py @@ -0,0 +1,15 @@ +#!/usr/bin/env bash +''' + +''' + +from prometheus_client import start_http_server, Summary + + +# main ---------------------------------------- +def main(): + print('Fin') + + +if __name__ == '__main__': + main() From f92f81092c4b250ea8dd12894d3f7783f0fd1d2d Mon Sep 17 00:00:00 2001 From: Gil Hoggarth Date: Thu, 13 Aug 2020 09:37:22 +0100 Subject: [PATCH 02/11] configured logging --- stat-pusher/common/__init__.py | 1 + stat-pusher/common/log.py | 8 ++++++++ stat-pusher/update_pushgateway_stats.py | 13 ++++++++++--- 3 files changed, 19 insertions(+), 3 deletions(-) create mode 100644 stat-pusher/common/__init__.py create mode 100644 stat-pusher/common/log.py diff --git a/stat-pusher/common/__init__.py b/stat-pusher/common/__init__.py new file mode 100644 index 0000000..3921af1 --- /dev/null +++ b/stat-pusher/common/__init__.py @@ -0,0 +1 @@ +import common.log diff --git a/stat-pusher/common/log.py b/stat-pusher/common/log.py new file mode 100644 index 0000000..759e1c5 --- /dev/null +++ b/stat-pusher/common/log.py @@ -0,0 +1,8 @@ +''' +Common/non-script specific functions +''' + +import logging + +def configure(lvl='INFO'): + logging.basicConfig(format='[%(asctime)s %(levelname)s] %(message)s', level=lvl) diff --git a/stat-pusher/update_pushgateway_stats.py b/stat-pusher/update_pushgateway_stats.py index 379242f..d2d1d2e 100755 --- a/stat-pusher/update_pushgateway_stats.py +++ b/stat-pusher/update_pushgateway_stats.py @@ -1,14 +1,21 @@ -#!/usr/bin/env bash +#!/usr/bin/env python ''' - +Script to gather WA bespoke service stats and upload to push gateway service ''' +import logging + from prometheus_client import start_http_server, Summary +# script packages +from common import log +#import common # main ---------------------------------------- def main(): - print('Fin') + log.configure(lvl='DEBUG') + logging.info('Start ---------------------') + logging.info('Fin') if __name__ == '__main__': From b89b0eccbf44110a6779ffff679ed0ccf8d4cc9d Mon Sep 17 00:00:00 2001 From: Gil Hoggarth Date: Thu, 13 Aug 2020 12:15:55 +0100 Subject: [PATCH 03/11] Settings per environ --- stat-pusher/run_stat_pusher.sh | 9 ++++++- stat-pusher/script/args.py | 28 +++++++++++++++++++ stat-pusher/script/settings.py | 36 +++++++++++++++++++++++++ stat-pusher/settings | 8 ++++++ stat-pusher/update_pushgateway_stats.py | 16 ++++++++--- 5 files changed, 93 insertions(+), 4 deletions(-) create mode 100644 stat-pusher/script/args.py create mode 100644 stat-pusher/script/settings.py create mode 100644 stat-pusher/settings diff --git a/stat-pusher/run_stat_pusher.sh b/stat-pusher/run_stat_pusher.sh index d720e65..503c28f 100755 --- a/stat-pusher/run_stat_pusher.sh +++ b/stat-pusher/run_stat_pusher.sh @@ -1,5 +1,12 @@ #!/usr/bin/env bash +# read script environ argument +ENVIRON=$1 +if ! [[ ${ENVIRON} =~ dev|beta|prod ]]; then + echo "ERROR: Script $0 requires environment argument" + exit +fi + # if wa-internal envars needed, source file source ~/gitlab/ukwa-monitor/monitoring.sh @@ -13,4 +20,4 @@ cd $PYTHONPATH pip install -r requirements.txt # run stat-pusher script -python update_pushgateway_stats.py +python update_pushgateway_stats.py ${ENVIRON} diff --git a/stat-pusher/script/args.py b/stat-pusher/script/args.py new file mode 100644 index 0000000..b630282 --- /dev/null +++ b/stat-pusher/script/args.py @@ -0,0 +1,28 @@ +''' +script arguments handling +Only expected argument identifies script environ, to allow different settings +for the different development environmens +''' + +import sys +import logging + +def passed(): + environ = '' + if len(sys.argv) == 2: + environ = sys.argv[1] + logging.debug("Script argument [{}]".format(environ)) + + # test environ value + if environ == 'dev' or environ == 'beta' or environ == 'prod': + pass + else: + logging.error("Script environ argument not recognised [{}]".format(environ)) + sys.exit() + + else: + logging.error("Script environ argument not identified") + logging.error("sys.argv [{}]".format(sys.argv)) + sys.exit() + + return environ diff --git a/stat-pusher/script/settings.py b/stat-pusher/script/settings.py new file mode 100644 index 0000000..2882803 --- /dev/null +++ b/stat-pusher/script/settings.py @@ -0,0 +1,36 @@ +import os +import sys +import logging +import configparser + +stgFile = 'settings' +environ = '' + +# functions ------------------------ +def read(env='dev'): + global stgFile + global environ + cfg = configparser.ConfigParser() + + # test settings file exists + if os.path.isfile(stgFile): + logging.debug("Reading [{}] settings".format(env)) + # read environ settings file + cfg.read(stgFile) + + if env in cfg.sections(): + environ = cfg[env] + else: + logging.error("[{}] settings file missing".format(stgFile)) + sys.exit() + + logging.info("Using {} environment settings".format(env)) + +def get(key): + global environ + if key in environ: + logging.debug("setting {}: {}".format(key, environ[key])) + return environ[key] + else: + logging.error("No cfg key [{}] declared".format(key)) + sys.exit() diff --git a/stat-pusher/settings b/stat-pusher/settings new file mode 100644 index 0000000..1534f2e --- /dev/null +++ b/stat-pusher/settings @@ -0,0 +1,8 @@ +[dev] +pushgtw = http://monitor-pushgateway.dapi.wa.bl.uk + +[beta] +pushgtw = http://monitor-pushgateway.bapi.wa.bl.uk + +[prod] +pushgtw = http://monitor-pushgateway.api.wa.bl.uk diff --git a/stat-pusher/update_pushgateway_stats.py b/stat-pusher/update_pushgateway_stats.py index d2d1d2e..496c51e 100755 --- a/stat-pusher/update_pushgateway_stats.py +++ b/stat-pusher/update_pushgateway_stats.py @@ -3,18 +3,28 @@ Script to gather WA bespoke service stats and upload to push gateway service ''' +# python libraries import logging - -from prometheus_client import start_http_server, Summary +from prometheus_client import CollectorRegistry, Gauge, push_to_gateway # script packages from common import log -#import common +from script import args, settings + # main ---------------------------------------- def main(): log.configure(lvl='DEBUG') logging.info('Start ---------------------') + + + # get script environ argument + environ = args.passed() + + # read environment settings + settings.read(env=environ) + + logging.info('Fin') From 852fc90acc628921a8cb43bbe713ea09c949b82d Mon Sep 17 00:00:00 2001 From: Gil Hoggarth Date: Thu, 13 Aug 2020 17:23:12 +0100 Subject: [PATCH 04/11] uploading duff values to pushgateway --- stat-pusher/dev.tests | 0 stat-pusher/settings | 1 + stat-pusher/update_pushgateway_stats.py | 16 ++++++++++++++++ 3 files changed, 17 insertions(+) create mode 100644 stat-pusher/dev.tests diff --git a/stat-pusher/dev.tests b/stat-pusher/dev.tests new file mode 100644 index 0000000..e69de29 diff --git a/stat-pusher/settings b/stat-pusher/settings index 1534f2e..bb65a55 100644 --- a/stat-pusher/settings +++ b/stat-pusher/settings @@ -1,5 +1,6 @@ [dev] pushgtw = http://monitor-pushgateway.dapi.wa.bl.uk +testsfile = dev.tests [beta] pushgtw = http://monitor-pushgateway.bapi.wa.bl.uk diff --git a/stat-pusher/update_pushgateway_stats.py b/stat-pusher/update_pushgateway_stats.py index 496c51e..a6c2292 100755 --- a/stat-pusher/update_pushgateway_stats.py +++ b/stat-pusher/update_pushgateway_stats.py @@ -24,6 +24,22 @@ def main(): # read environment settings settings.read(env=environ) + # loop through wa service stats + + # declare registry, inside loop for service + registry = CollectorRegistry() + + # set/get stat values + statJob = 'gilh' + statName = statJob + '_' + 'unixtime' + statDesc = 'set to current unix time' + statValue = 824 + g = Gauge(statName, statDesc, registry=registry) + g.set(statValue) + logging.debug("Added job [{}] statName [{}] statValue [{}]".format(statJob, statName, statValue)) + + # upload to push gateway + push_to_gateway(settings.get('pushgtw'), registry=registry, job=statJob) logging.info('Fin') From 6ebc4c8f08937ba52b62bdda642e029c2b6dc7e1 Mon Sep 17 00:00:00 2001 From: Gil Hoggarth Date: Fri, 14 Aug 2020 10:16:24 +0100 Subject: [PATCH 05/11] Added prometheus labels to stat push --- stat-pusher/update_pushgateway_stats.py | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/stat-pusher/update_pushgateway_stats.py b/stat-pusher/update_pushgateway_stats.py index a6c2292..ef61ba9 100755 --- a/stat-pusher/update_pushgateway_stats.py +++ b/stat-pusher/update_pushgateway_stats.py @@ -31,15 +31,19 @@ def main(): # set/get stat values statJob = 'gilh' + statHost='solr8' + statLabel='asdf' statName = statJob + '_' + 'unixtime' statDesc = 'set to current unix time' statValue = 824 - g = Gauge(statName, statDesc, registry=registry) - g.set(statValue) - logging.debug("Added job [{}] statName [{}] statValue [{}]".format(statJob, statName, statValue)) + + g = Gauge(statName, statDesc, labelnames=['instance','label'], registry=registry) + g.labels(instance=statHost,label=statLabel).set(statValue) + logging.debug("Added job [{}] statHost [{}] statName [{}] statValue [{}]".format(statJob, statHost, statName, statValue)) # upload to push gateway push_to_gateway(settings.get('pushgtw'), registry=registry, job=statJob) + logging.info("Uploaded job {}".format(statJob)) logging.info('Fin') From 9e3a4801b056e2134acf7a892a7e160bacdd0d3f Mon Sep 17 00:00:00 2001 From: Gil Hoggarth Date: Fri, 14 Aug 2020 11:18:02 +0100 Subject: [PATCH 06/11] file shuffling --- stat-pusher/dev.stats | 13 +++++++++++++ stat-pusher/dev.tests | 0 stat-pusher/settings | 2 +- 3 files changed, 14 insertions(+), 1 deletion(-) create mode 100644 stat-pusher/dev.stats delete mode 100644 stat-pusher/dev.tests diff --git a/stat-pusher/dev.stats b/stat-pusher/dev.stats new file mode 100644 index 0000000..81eb830 --- /dev/null +++ b/stat-pusher/dev.stats @@ -0,0 +1,13 @@ +{ + 'job': 'trackdb', + 'stats': [ + { + 'name': 'refresh_timestamp', + 'host': 'solr8', + 'label': '', + 'desc': 'Most recent trackdb refresh_timestamp', + 'uri': 'http://solr8.api.wa.bl.uk/solr/tracking/select?q=*:*&sort=refresh_date_dt%20desc&wt=json', + 'match': 'refresh_date_dt' + } + ] +} diff --git a/stat-pusher/dev.tests b/stat-pusher/dev.tests deleted file mode 100644 index e69de29..0000000 diff --git a/stat-pusher/settings b/stat-pusher/settings index bb65a55..bef01c0 100644 --- a/stat-pusher/settings +++ b/stat-pusher/settings @@ -1,6 +1,6 @@ [dev] pushgtw = http://monitor-pushgateway.dapi.wa.bl.uk -testsfile = dev.tests +statsfile = dev.stats [beta] pushgtw = http://monitor-pushgateway.bapi.wa.bl.uk From 2eaf9f50adaa7b5ff8f5780e45759b43bea62eb6 Mon Sep 17 00:00:00 2001 From: Gil Hoggarth Date: Fri, 14 Aug 2020 18:22:17 +0100 Subject: [PATCH 07/11] Submitting numbers into prometheus; working on dates --- stat-pusher/dev.stats | 27 ++++++----- stat-pusher/requirements.txt | 4 ++ stat-pusher/script/stat_values.py | 58 ++++++++++++++++++++++++ stat-pusher/update_pushgateway_stats.py | 59 ++++++++++++++++++------- 4 files changed, 121 insertions(+), 27 deletions(-) create mode 100644 stat-pusher/script/stat_values.py diff --git a/stat-pusher/dev.stats b/stat-pusher/dev.stats index 81eb830..485642a 100644 --- a/stat-pusher/dev.stats +++ b/stat-pusher/dev.stats @@ -1,13 +1,20 @@ { - 'job': 'trackdb', - 'stats': [ - { - 'name': 'refresh_timestamp', - 'host': 'solr8', - 'label': '', - 'desc': 'Most recent trackdb refresh_timestamp', - 'uri': 'http://solr8.api.wa.bl.uk/solr/tracking/select?q=*:*&sort=refresh_date_dt%20desc&wt=json', - 'match': 'refresh_date_dt' + "trackdb": { + "numFound": { + "host": "solr8", + "label": "", + "desc": "Number of records in trackdb collection", + "kind": "json", + "uri": "http://solr8.api.wa.bl.uk/solr/tracking/select?q=*:*&wt=json", + "match": "['response','numFound']" + }, + "refresh_timestamp": { + "host": "solr8", + "label": "", + "desc": "Most recent trackdb refresh_timestamp", + "uri": "http://solr8.api.wa.bl.uk/solr/tracking/select?q=*:*&sort=refresh_date_dt%20desc&wt=json", + "kind": "json", + "match": "['response','docs','refresh_date_dt']" } - ] + } } diff --git a/stat-pusher/requirements.txt b/stat-pusher/requirements.txt index a0753df..59d68f2 100644 --- a/stat-pusher/requirements.txt +++ b/stat-pusher/requirements.txt @@ -1 +1,5 @@ prometheus_client +requests +python_dateutil +calendar +datetime diff --git a/stat-pusher/script/stat_values.py b/stat-pusher/script/stat_values.py new file mode 100644 index 0000000..75e4305 --- /dev/null +++ b/stat-pusher/script/stat_values.py @@ -0,0 +1,58 @@ +''' +get stat value +Depending on service and data return, method of getting stat value expected to differ. +This module is to cater for each variant. +''' + +import logging +import ast +import requests +import sys +import dateutil.parser +import calendar + +def get_json_value(uri, match): + logging.debug("uri [{}]".format(uri)) + + # convert match string into list, to traverse uri json response + matchList = ast.literal_eval(match) + logging.debug("matchList [{}] type [{}]".format(matchList, type(matchList))) + + # get response + try: + r = requests.get(uri) + response = r.json() + except Exception as e: + logging.error("Failed to get [{}]\n[{}]".format(uri, e)) + sys.exit() + + #### NEED TO ENSURE RESPONSE IS SUCCESSFUL ######################################## + + + # extract value + for k in matchList: + if k in response: + response = response[k] + elif k in response[0]: + response = response[0][k] + else: + logging.error("match key [{}] not found in uri {}\njson [{}]".format(k, uri, response)) + sys.exit() + logging.debug("response [{}] type [{}]".format(response, type(response))) + + # ensure numerical value + if type(response) is not int: + if type(response) is float: + response = int(response) + + # if value is a timestamp, get unixtime + #### NEED TO WORK OUT PROCESSING OF STRING INTO UNIXTIME ############################ + elif dateutil.parser.parse(response): + dst = calendar.timegm(dateutil.parser.parse(response)) + + logging.debug("date string [{}] dst [{}]".format(response, type(dst))) + response = 42 + else: + response = 4 + + return response diff --git a/stat-pusher/update_pushgateway_stats.py b/stat-pusher/update_pushgateway_stats.py index ef61ba9..d38c578 100755 --- a/stat-pusher/update_pushgateway_stats.py +++ b/stat-pusher/update_pushgateway_stats.py @@ -5,11 +5,14 @@ # python libraries import logging +import os +import sys +import json from prometheus_client import CollectorRegistry, Gauge, push_to_gateway # script packages from common import log -from script import args, settings +from script import args, settings, stat_values # main ---------------------------------------- @@ -24,26 +27,48 @@ def main(): # read environment settings settings.read(env=environ) - # loop through wa service stats + # read stats file + statTests = '' + if os.path.isfile(settings.get('statsfile')): + try: + with open(settings.get('statsfile'), 'r') as infile: + statTests = json.load(infile) + except Exception as e: + logging.error("Failed to read statsfile [{}]\n[{}]".format(settings.get('statsfile'), e)) + sys.exit() + else: + logging.error("statsfile [{}] to test for [{}] environment missing".format(settings.get('statsfile'), environ)) + sys.exit() # declare registry, inside loop for service registry = CollectorRegistry() - # set/get stat values - statJob = 'gilh' - statHost='solr8' - statLabel='asdf' - statName = statJob + '_' + 'unixtime' - statDesc = 'set to current unix time' - statValue = 824 - - g = Gauge(statName, statDesc, labelnames=['instance','label'], registry=registry) - g.labels(instance=statHost,label=statLabel).set(statValue) - logging.debug("Added job [{}] statHost [{}] statName [{}] statValue [{}]".format(statJob, statHost, statName, statValue)) - - # upload to push gateway - push_to_gateway(settings.get('pushgtw'), registry=registry, job=statJob) - logging.info("Uploaded job {}".format(statJob)) + # loop through wa service stats + for job in statTests: + for stat in statTests[job]: + try: + name = job + '_' + stat + host = statTests[job][stat]['host'] + label = statTests[job][stat]['label'] + desc = statTests[job][stat]['desc'] + kind = statTests[job][stat]['kind'] + uri = statTests[job][stat]['uri'] + match = statTests[job][stat]['match'] + except Exception as e: + logging.error("Children of job [{}] stat [{}] missing\n[{}]".format(job, stat, e)) + sys.exit() + + # get stat value + if kind == 'json': + value = stat_values.get_json_value(uri, match) + + g = Gauge(name, desc, labelnames=['instance','label'], registry=registry) + g.labels(instance=host,label=label).set(value) + logging.debug("Added job [{}] host [{}] name [{}] value [{}]".format(job, host, name, value)) + + # upload to push gateway + push_to_gateway(settings.get('pushgtw'), registry=registry, job=job) + logging.info("Uploaded {} {} {}".format(job, stat, value)) logging.info('Fin') From 6a9c03baf4f4343925f48644727c45822673fe9f Mon Sep 17 00:00:00 2001 From: Gil Hoggarth Date: Mon, 17 Aug 2020 11:08:28 +0100 Subject: [PATCH 08/11] Shifted to f-strings --- stat-pusher/script/args.py | 6 +++--- stat-pusher/script/settings.py | 13 ++++++++----- stat-pusher/script/stat_values.py | 25 ++++++++++++++----------- stat-pusher/update_pushgateway_stats.py | 12 +++++++----- 4 files changed, 32 insertions(+), 24 deletions(-) diff --git a/stat-pusher/script/args.py b/stat-pusher/script/args.py index b630282..7857de0 100644 --- a/stat-pusher/script/args.py +++ b/stat-pusher/script/args.py @@ -11,18 +11,18 @@ def passed(): environ = '' if len(sys.argv) == 2: environ = sys.argv[1] - logging.debug("Script argument [{}]".format(environ)) + logging.debug(f"Script argument [{environ}]") # test environ value if environ == 'dev' or environ == 'beta' or environ == 'prod': pass else: - logging.error("Script environ argument not recognised [{}]".format(environ)) + logging.error(f"Script environ argument not recognised [{environ}]") sys.exit() else: logging.error("Script environ argument not identified") - logging.error("sys.argv [{}]".format(sys.argv)) + logging.error(f"sys.argv [{sys.argv}]") sys.exit() return environ diff --git a/stat-pusher/script/settings.py b/stat-pusher/script/settings.py index 2882803..496d963 100644 --- a/stat-pusher/script/settings.py +++ b/stat-pusher/script/settings.py @@ -14,23 +14,26 @@ def read(env='dev'): # test settings file exists if os.path.isfile(stgFile): - logging.debug("Reading [{}] settings".format(env)) + logging.debug(f"Reading [{env}] settings") # read environ settings file cfg.read(stgFile) if env in cfg.sections(): environ = cfg[env] + else: + logging.error(f"[{env}] section missing from [{stgFile}] settings file") + sys.exit() else: - logging.error("[{}] settings file missing".format(stgFile)) + logging.error(f"[{stgFile}] settings file missing") sys.exit() - logging.info("Using {} environment settings".format(env)) + logging.info(f"Using {env} environment settings") def get(key): global environ if key in environ: - logging.debug("setting {}: {}".format(key, environ[key])) + logging.debug(f"setting {key}: [{environ[key]}]") return environ[key] else: - logging.error("No cfg key [{}] declared".format(key)) + logging.error(f"No cfg key [{key}] declared") sys.exit() diff --git a/stat-pusher/script/stat_values.py b/stat-pusher/script/stat_values.py index 75e4305..4b6062a 100644 --- a/stat-pusher/script/stat_values.py +++ b/stat-pusher/script/stat_values.py @@ -12,23 +12,25 @@ import calendar def get_json_value(uri, match): - logging.debug("uri [{}]".format(uri)) + logging.debug(f"uri [{uri}]") # convert match string into list, to traverse uri json response matchList = ast.literal_eval(match) - logging.debug("matchList [{}] type [{}]".format(matchList, type(matchList))) + logging.debug(f"matchList [{matchList}] type [{type(matchList)}]") # get response try: r = requests.get(uri) + logging.debug(f"Response code [{r.status_code}]") + r.raise_for_status() response = r.json() + except HTTPError as he: + logging.error(f"HTTP error trying to get [{uri}]\n[{he}]") + sys.exit() except Exception as e: - logging.error("Failed to get [{}]\n[{}]".format(uri, e)) + logging.error(f"Failed to get [{uri}]\n[{e}]") sys.exit() - #### NEED TO ENSURE RESPONSE IS SUCCESSFUL ######################################## - - # extract value for k in matchList: if k in response: @@ -36,9 +38,9 @@ def get_json_value(uri, match): elif k in response[0]: response = response[0][k] else: - logging.error("match key [{}] not found in uri {}\njson [{}]".format(k, uri, response)) + logging.error(f"match key [{k}] not found in uri {uri}\njson [{response}]") sys.exit() - logging.debug("response [{}] type [{}]".format(response, type(response))) + logging.debug(f"Value [{response}] type [{type(response)}]") # ensure numerical value if type(response) is not int: @@ -48,11 +50,12 @@ def get_json_value(uri, match): # if value is a timestamp, get unixtime #### NEED TO WORK OUT PROCESSING OF STRING INTO UNIXTIME ############################ elif dateutil.parser.parse(response): - dst = calendar.timegm(dateutil.parser.parse(response)) + #dst = calendar.timegm(dateutil.parser.parse(response)) - logging.debug("date string [{}] dst [{}]".format(response, type(dst))) + #logging.debug(f"date string [{response}] dst [{type(dst)}]") response = 42 else: - response = 4 + logging.error(f"Value [{response}] type [{type(response)}] not convertible to numeric") + sys.exit() return response diff --git a/stat-pusher/update_pushgateway_stats.py b/stat-pusher/update_pushgateway_stats.py index d38c578..ff59a6a 100755 --- a/stat-pusher/update_pushgateway_stats.py +++ b/stat-pusher/update_pushgateway_stats.py @@ -34,10 +34,10 @@ def main(): with open(settings.get('statsfile'), 'r') as infile: statTests = json.load(infile) except Exception as e: - logging.error("Failed to read statsfile [{}]\n[{}]".format(settings.get('statsfile'), e)) + logging.error(f"Failed to read statsfile [{settings.get('statsfile')}]\n[{e}]") sys.exit() else: - logging.error("statsfile [{}] to test for [{}] environment missing".format(settings.get('statsfile'), environ)) + logging.error(f"statsfile [{settings.get('statsfile')}] to test for [{environ}] environment missing") sys.exit() # declare registry, inside loop for service @@ -46,6 +46,7 @@ def main(): # loop through wa service stats for job in statTests: for stat in statTests[job]: + # get stat details try: name = job + '_' + stat host = statTests[job][stat]['host'] @@ -55,20 +56,21 @@ def main(): uri = statTests[job][stat]['uri'] match = statTests[job][stat]['match'] except Exception as e: - logging.error("Children of job [{}] stat [{}] missing\n[{}]".format(job, stat, e)) + logging.error(f"Children of job [{job}] stat [{stat}] missing\n[{e}]") sys.exit() # get stat value if kind == 'json': value = stat_values.get_json_value(uri, match) + # set pushgateway submission details g = Gauge(name, desc, labelnames=['instance','label'], registry=registry) g.labels(instance=host,label=label).set(value) - logging.debug("Added job [{}] host [{}] name [{}] value [{}]".format(job, host, name, value)) + logging.debug(f"Added job [{job}] host [{host}] name [{name}] value [{value}]") # upload to push gateway push_to_gateway(settings.get('pushgtw'), registry=registry, job=job) - logging.info("Uploaded {} {} {}".format(job, stat, value)) + logging.info(f"Uploaded {job} {stat} {value}") logging.info('Fin') From 94d0b81859544c9ec93bcf900133fb955cf20859 Mon Sep 17 00:00:00 2001 From: Gil Hoggarth Date: Mon, 17 Aug 2020 12:14:25 +0100 Subject: [PATCH 09/11] Solved datestamps; now submitting trackdb refresh_timestamp --- stat-pusher/requirements.txt | 1 - stat-pusher/script/stat_values.py | 40 ++++++++++++++++--------------- 2 files changed, 21 insertions(+), 20 deletions(-) diff --git a/stat-pusher/requirements.txt b/stat-pusher/requirements.txt index 59d68f2..21b1c2c 100644 --- a/stat-pusher/requirements.txt +++ b/stat-pusher/requirements.txt @@ -1,5 +1,4 @@ prometheus_client requests python_dateutil -calendar datetime diff --git a/stat-pusher/script/stat_values.py b/stat-pusher/script/stat_values.py index 4b6062a..218172a 100644 --- a/stat-pusher/script/stat_values.py +++ b/stat-pusher/script/stat_values.py @@ -9,7 +9,7 @@ import requests import sys import dateutil.parser -import calendar +import datetime def get_json_value(uri, match): logging.debug(f"uri [{uri}]") @@ -32,30 +32,32 @@ def get_json_value(uri, match): sys.exit() # extract value + value = response for k in matchList: - if k in response: - response = response[k] - elif k in response[0]: - response = response[0][k] + if k in value: + value = value[k] + elif k in value[0]: + value = value[0][k] else: - logging.error(f"match key [{k}] not found in uri {uri}\njson [{response}]") + logging.error(f"match key [{k}] not found in uri {uri}\njson [{value}]") sys.exit() - logging.debug(f"Value [{response}] type [{type(response)}]") + logging.debug(f"Value [{value}] type [{type(value)}]") # ensure numerical value - if type(response) is not int: - if type(response) is float: - response = int(response) - + if type(value) is not int and type(value) is not float: # if value is a timestamp, get unixtime - #### NEED TO WORK OUT PROCESSING OF STRING INTO UNIXTIME ############################ - elif dateutil.parser.parse(response): - #dst = calendar.timegm(dateutil.parser.parse(response)) - - #logging.debug(f"date string [{response}] dst [{type(dst)}]") - response = 42 + dt = None + try: + dt = dateutil.parser.parse(value) + except Exception as e: + logging.error(f"Value [{value}] type [{type(value)}] not recognised as datestamp") + sys.exit() + if isinstance(dt, datetime.datetime): + logging.debug(f"timestamp dt [{dt}] type [{type(dt)}]") + value = dt.timestamp() + logging.debug(f"Value epoch [{value}]") else: - logging.error(f"Value [{response}] type [{type(response)}] not convertible to numeric") + logging.error(f"Value [{value}] type [{type(value)}] not convertible to numeric") sys.exit() - return response + return value From f51f0eaf9493999f25c3fb4336cfc85ff3eed123 Mon Sep 17 00:00:00 2001 From: Gil Hoggarth Date: Mon, 17 Aug 2020 14:53:44 +0100 Subject: [PATCH 10/11] Added trackdb_daily_refresh alert --- monitor/prometheus/alert.rules.yml | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/monitor/prometheus/alert.rules.yml b/monitor/prometheus/alert.rules.yml index 4ac12cc..1e74cfb 100644 --- a/monitor/prometheus/alert.rules.yml +++ b/monitor/prometheus/alert.rules.yml @@ -1,4 +1,15 @@ groups: +- name: trackdb + rules: + - alert: trackdb_daily_refresh_has_not_run + expr: (time() - trackdb_refresh_timestamp) / (60*60) > 24 + for: 1h + labels: + severity: severe + annotations: + summary: "TrackDB not updated in last 24 hours" + description: "{{ $labels.instance }} of {{ $labels.job }} hasn't changed in over 24 hours" + - name: Core metrics rules: From 85a0c629078f36cfac364066981dc70559e90809 Mon Sep 17 00:00:00 2001 From: Gil Hoggarth Date: Tue, 18 Aug 2020 14:33:37 +0100 Subject: [PATCH 11/11] trackdb alert and report --- .../generate_wa-status_dashboard.py | 4 + .../provisioning/dashboards/wa_status.json | 111 +++++++++++++++--- .../dashboards/wa_status.json-template | 111 +++++++++++++++--- 3 files changed, 194 insertions(+), 32 deletions(-) diff --git a/monitor/grafana/provisioning/_dashboard_generator/generate_wa-status_dashboard.py b/monitor/grafana/provisioning/_dashboard_generator/generate_wa-status_dashboard.py index 85f66c1..256cbd8 100755 --- a/monitor/grafana/provisioning/_dashboard_generator/generate_wa-status_dashboard.py +++ b/monitor/grafana/provisioning/_dashboard_generator/generate_wa-status_dashboard.py @@ -105,6 +105,9 @@ def replace_output_single(outHandle, **kwargs): elif kwargs['title'] == 'WWW' or kwargs['title'] == 'Query': expr = 'count(probe_http_status_code{job=\\"' + kwargs['job'] + '\\"} != 200) OR vector(0)' templateCode = templateCode.replace('', expr) + elif kwargs['title'] == 'trackdb': + expr = '((time() - trackdb_refresh_timestamp) / (60*60) > 24) OR vector(0)' + templateCode = templateCode.replace('', expr) # remove last comma if last panel if 'lastPanel' in kwargs: @@ -155,6 +158,7 @@ def main(): replace_output_single(outHandle, tmpFl=panelSingle, job='services', title='CPU', h=2, w=2, x=2, y=6) replace_output_single(outHandle, tmpFl=panelSingle, job='services', title='Dsk', h=2, w=2, x=4, y=6) replace_output_single(outHandle, tmpFl=panelSingle, job='services', title='Mem', h=2, w=2, x=6, y=6) + replace_output_single(outHandle, tmpFl=panelSingle, job='services', title='trackdb', h=2, w=2, x=0, y=8) replace_output_title(outHandle, tmpFl=panelTitle, job='gluster', title='Gluster', h=1, w=8, x=8, y=6) replace_output_single(outHandle, tmpFl=panelSingle, job='gluster', title='Up', h=2, w=2, x=8, y=7) replace_output_single(outHandle, tmpFl=panelSingle, job='gluster', title='CPU', h=2, w=2, x=10, y=7) diff --git a/monitor/grafana/provisioning/dashboards/wa_status.json b/monitor/grafana/provisioning/dashboards/wa_status.json index 25812fa..fe0840c 100644 --- a/monitor/grafana/provisioning/dashboards/wa_status.json +++ b/monitor/grafana/provisioning/dashboards/wa_status.json @@ -1812,6 +1812,85 @@ ], "valueName": "current" }, + { + "cacheTimeout": null, + "colorBackground": true, + "colorValue": false, + "colors": [ + "#299c46", + "rgba(237, 129, 40, 0.89)", + "#d44a3a" + ], + "format": "none", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "gridPos": { + "h": 2, + "w": 2, + "x": 0, + "y": 8 + }, + "id": 27, + "interval": null, + "links": [], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "nullPointMode": "connected", + "nullText": null, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": false + }, + "tableColumn": "", + "targets": [ + { + "expr": "((time() - trackdb_refresh_timestamp) / (60*60) > 24) OR vector(0)", + "format": "time_series", + "intervalFactor": 1, + "refId": "A" + } + ], + "thresholds": "0.1,1", + "title": "trackdb", + "type": "singlestat", + "valueFontSize": "80%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + } + ], + "valueName": "current" + }, { "content": "", "gridPos": { @@ -1820,7 +1899,7 @@ "x": 8, "y": 6 }, - "id": 27, + "id": 28, "links": [], "mode": "markdown", "title": "Gluster", @@ -1849,7 +1928,7 @@ "x": 8, "y": 7 }, - "id": 28, + "id": 29, "interval": null, "links": [], "mappingType": 1, @@ -1928,7 +2007,7 @@ "x": 10, "y": 7 }, - "id": 29, + "id": 30, "interval": null, "links": [], "mappingType": 1, @@ -2007,7 +2086,7 @@ "x": 12, "y": 7 }, - "id": 30, + "id": 31, "interval": null, "links": [], "mappingType": 1, @@ -2086,7 +2165,7 @@ "x": 14, "y": 7 }, - "id": 31, + "id": 32, "interval": null, "links": [], "mappingType": 1, @@ -2150,7 +2229,7 @@ "x": 16, "y": 5 }, - "id": 32, + "id": 33, "links": [], "mode": "markdown", "title": "Solr", @@ -2179,7 +2258,7 @@ "x": 16, "y": 6 }, - "id": 33, + "id": 34, "interval": null, "links": [], "mappingType": 1, @@ -2258,7 +2337,7 @@ "x": 18, "y": 6 }, - "id": 34, + "id": 35, "interval": null, "links": [], "mappingType": 1, @@ -2337,7 +2416,7 @@ "x": 20, "y": 6 }, - "id": 35, + "id": 36, "interval": null, "links": [], "mappingType": 1, @@ -2416,7 +2495,7 @@ "x": 22, "y": 6 }, - "id": 36, + "id": 37, "interval": null, "links": [], "mappingType": 1, @@ -2495,7 +2574,7 @@ "x": 16, "y": 8 }, - "id": 37, + "id": 38, "interval": null, "links": [], "mappingType": 1, @@ -2559,7 +2638,7 @@ "x": 8, "y": 9 }, - "id": 38, + "id": 39, "links": [], "mode": "markdown", "title": "Infrastructure", @@ -2588,7 +2667,7 @@ "x": 8, "y": 10 }, - "id": 39, + "id": 40, "interval": null, "links": [], "mappingType": 1, @@ -2667,7 +2746,7 @@ "x": 10, "y": 10 }, - "id": 40, + "id": 41, "interval": null, "links": [], "mappingType": 1, @@ -2746,7 +2825,7 @@ "x": 12, "y": 10 }, - "id": 41, + "id": 42, "interval": null, "links": [], "mappingType": 1, @@ -2825,7 +2904,7 @@ "x": 14, "y": 10 }, - "id": 42, + "id": 43, "interval": null, "links": [], "mappingType": 1, diff --git a/monitor/grafana/provisioning/dashboards/wa_status.json-template b/monitor/grafana/provisioning/dashboards/wa_status.json-template index f770e28..bef732f 100644 --- a/monitor/grafana/provisioning/dashboards/wa_status.json-template +++ b/monitor/grafana/provisioning/dashboards/wa_status.json-template @@ -1812,6 +1812,85 @@ ], "valueName": "current" }, + { + "cacheTimeout": null, + "colorBackground": true, + "colorValue": false, + "colors": [ + "#299c46", + "rgba(237, 129, 40, 0.89)", + "#d44a3a" + ], + "format": "none", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "gridPos": { + "h": 2, + "w": 2, + "x": 0, + "y": 8 + }, + "id": 27, + "interval": null, + "links": [], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "nullPointMode": "connected", + "nullText": null, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": false + }, + "tableColumn": "", + "targets": [ + { + "expr": "((time() - trackdb_refresh_timestamp) / (60*60) > 24) OR vector(0)", + "format": "time_series", + "intervalFactor": 1, + "refId": "A" + } + ], + "thresholds": "0.1,1", + "title": "trackdb", + "type": "singlestat", + "valueFontSize": "80%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + } + ], + "valueName": "current" + }, { "content": "", "gridPos": { @@ -1820,7 +1899,7 @@ "x": 8, "y": 6 }, - "id": 27, + "id": 28, "links": [], "mode": "markdown", "title": "Gluster", @@ -1849,7 +1928,7 @@ "x": 8, "y": 7 }, - "id": 28, + "id": 29, "interval": null, "links": [], "mappingType": 1, @@ -1928,7 +2007,7 @@ "x": 10, "y": 7 }, - "id": 29, + "id": 30, "interval": null, "links": [], "mappingType": 1, @@ -2007,7 +2086,7 @@ "x": 12, "y": 7 }, - "id": 30, + "id": 31, "interval": null, "links": [], "mappingType": 1, @@ -2086,7 +2165,7 @@ "x": 14, "y": 7 }, - "id": 31, + "id": 32, "interval": null, "links": [], "mappingType": 1, @@ -2150,7 +2229,7 @@ "x": 16, "y": 5 }, - "id": 32, + "id": 33, "links": [], "mode": "markdown", "title": "Solr", @@ -2179,7 +2258,7 @@ "x": 16, "y": 6 }, - "id": 33, + "id": 34, "interval": null, "links": [], "mappingType": 1, @@ -2258,7 +2337,7 @@ "x": 18, "y": 6 }, - "id": 34, + "id": 35, "interval": null, "links": [], "mappingType": 1, @@ -2337,7 +2416,7 @@ "x": 20, "y": 6 }, - "id": 35, + "id": 36, "interval": null, "links": [], "mappingType": 1, @@ -2416,7 +2495,7 @@ "x": 22, "y": 6 }, - "id": 36, + "id": 37, "interval": null, "links": [], "mappingType": 1, @@ -2495,7 +2574,7 @@ "x": 16, "y": 8 }, - "id": 37, + "id": 38, "interval": null, "links": [], "mappingType": 1, @@ -2559,7 +2638,7 @@ "x": 8, "y": 9 }, - "id": 38, + "id": 39, "links": [], "mode": "markdown", "title": "Infrastructure", @@ -2588,7 +2667,7 @@ "x": 8, "y": 10 }, - "id": 39, + "id": 40, "interval": null, "links": [], "mappingType": 1, @@ -2667,7 +2746,7 @@ "x": 10, "y": 10 }, - "id": 40, + "id": 41, "interval": null, "links": [], "mappingType": 1, @@ -2746,7 +2825,7 @@ "x": 12, "y": 10 }, - "id": 41, + "id": 42, "interval": null, "links": [], "mappingType": 1, @@ -2825,7 +2904,7 @@ "x": 14, "y": 10 }, - "id": 42, + "id": 43, "interval": null, "links": [], "mappingType": 1,