Skip to content

Commit

Permalink
YDA-5840 add monitoring thread to portal
Browse files Browse the repository at this point in the history
Add monitoring thread to the Yoda portal for collecting technical
support information in case of problems.
  • Loading branch information
stsnel committed Jul 31, 2024
1 parent 001631d commit 400bf4f
Show file tree
Hide file tree
Showing 7 changed files with 124 additions and 3 deletions.
6 changes: 4 additions & 2 deletions .github/workflows/python.yml
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
name: "Python lint and unit tests"
name: "Python lint and unit/integration tests"

on: [push, pull_request]

Expand Down Expand Up @@ -29,8 +29,10 @@ jobs:
run: |
mypy . --explicit-package-bases || true
- name: Run unit tests
- name: Run unit and integration tests
run: |
sudo apt -y install apache2-dev
python -m pip install -r requirements.txt
cd unit-tests
python -m unittest
Expand Down
8 changes: 8 additions & 0 deletions app.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
from general.general import general_bp
from group_manager.group_manager import group_manager_bp
from intake.intake import intake_bp
from monitor import Monitor
from open_search.open_search import open_search_bp
from research.research import research_bp
from search.search import search_bp
Expand Down Expand Up @@ -127,6 +128,13 @@ def load_admin_setting() -> Dict[str, Any]:
# Start Flask-Session
Session(app)

# Start monitoring thread for extracting tech support information
with app.app_context():
# Monitor signal file can be set to empty to completely disable monitor thread.
if app.config.get("MONITOR_SIGNAL_FILE", "/var/www/yoda/show-tech.sig") != "":
monitor_thread = Monitor(app.config)
monitor_thread.start()

# Register blueprints
with app.app_context():
app.register_blueprint(general_bp)
Expand Down
86 changes: 86 additions & 0 deletions monitor.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,86 @@
#!/usr/bin/env python3

"""This class implements a monitoring thread that can be used to collect technical
support information in case of problems with the portal."""

__copyright__ = 'Copyright (c) 2021-2024, Utrecht University'
__license__ = 'GPLv3, see LICENSE'

import os
import socket
import sys
import traceback
from datetime import datetime
from io import StringIO
from threading import Timer

import flask
import humanize
import psutil


class Monitor(Timer):

def __init__(self, config: flask.config.Config):
self.interval = 1
self.config = config
Timer.__init__(self, self.interval, self.record_info_if_needed)

def get_signal_file(self) -> str:
return self.config.get("MONITOR_SIGNAL_FILE", "/var/www/yoda/show-tech.sig")

def get_output_dir(self) -> str:
return self.config.get("MONITOR_OUTPUT_DIR", "/tmp")

def get_yoda_version(self) -> str:
return "{} ({})".format(self.config.get("YODA_VERSION", "N/A"),
self.config.get("YODA_COMMIT", "N/A"))

def record_info_if_needed(self):
while not self.finished.wait(self.interval):
try:
if os.path.isfile(self.get_signal_file()):
output_file = os.path.join(self.get_output_dir(),
datetime.now().strftime("yoda-portal-showtech-%d-%m-%Y-%H-%M-%S-%f.txt"))
with open(output_file, "w") as output:
tshoot_info = self.get_tshoot_info()
output.write(tshoot_info.getvalue())
except Exception as e:
print("Exception occurred in monitoring thread: {} ({})".format(str(e), str(type(e))))

def get_tshoot_info(self) -> StringIO:
output = StringIO()
date_string = datetime.now().strftime("%d/%m/%Y at %H:%M:%S.%f")
hostname = socket.getfqdn()
yoda_version = self.get_yoda_version()
output.write(f"Portal tech support info for {hostname}, collected on {date_string}\n")
output.write(f"Yoda version, as per portal config: {yoda_version}\n\n")

cpu_percent = str(psutil.cpu_percent()) + "%"
mem_total = humanize.naturalsize(psutil.virtual_memory().total)
mem_available = humanize.naturalsize(psutil.virtual_memory().available)
mem_buffers = humanize.naturalsize(psutil.virtual_memory().buffers)
mem_cached = humanize.naturalsize(psutil.virtual_memory().cached)
mem_info = psutil.Process().memory_info()
mem_rss = humanize.naturalsize(mem_info.rss)
mem_vms = humanize.naturalsize(mem_info.vms)
mem_shared = humanize.naturalsize(mem_info.shared)
output.write(f"System-wide CPU percent: {cpu_percent}\n")
output.write(f"Memory: global total: {mem_total}\n")
output.write(f"Memory: global available: {mem_available}\n")
output.write(f"Memory: global buffers: {mem_buffers}\n")
output.write(f"Memory: global cached: {mem_cached}\n")
output.write(f"Memory: process RSS: {mem_rss}\n")
output.write(f"Memory: process VMS: {mem_vms}\n")
output.write(f"Memory: process shared: {mem_shared}\n")

output.write("\n")

for thread_id, stack in sys._current_frames().items():
output.write(f"Thread ID: {thread_id}\n")
for filename, line_number, function_name, line in traceback.extract_stack(stack):
output.write(f" {filename}:{line_number} [{function_name}]\n")
output.write(f" {line}\n" if line else "")
output.write("\n")

return output
2 changes: 2 additions & 0 deletions requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -10,3 +10,5 @@ irods-avu-json==2.2.0
MarkupSafe==2.1.5
python-magic==0.4.27
Werkzeug==3.0.3
psutil==6.0.0
humanize==4.10.0
2 changes: 1 addition & 1 deletion setup.cfg
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ import-order-style=smarkets
strictness=short
docstring_style=sphinx
max-line-length=127
application-import-names=admin,api,connman,errors,fileviewer,general,group_manager,research,search,open_search,stats,user,vault,deposit,intake,datarequest,util
application-import-names=admin,api,connman,errors,fileviewer,general,group_manager,monitor,research,search,open_search,stats,user,vault,deposit,intake,datarequest,util
exclude=venv

[mypy]
Expand Down
21 changes: 21 additions & 0 deletions unit-tests/test_monitor.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
# -*- coding: utf-8 -*-
"""Integration tests for the monitoring thread."""

__copyright__ = 'Copyright (c) 2023-2024, Utrecht University'
__license__ = 'GPLv3, see LICENSE'

import sys
from unittest import TestCase

sys.path.append("..")

from monitor import Monitor


class MonitorTest(TestCase):
def test_can_get_tshoot_info(self) -> None:
config = {"YODA_VERSION": "test_version"}
monitor = Monitor(config)
tshoot_info = monitor.get_tshoot_info().getvalue()
self.assertIn("test_version", tshoot_info)
self.assertIn("Thread ID", tshoot_info)
2 changes: 2 additions & 0 deletions unit-tests/unit_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,10 +5,12 @@

from unittest import makeSuite, TestSuite

from test_monitor import MonitorTest
from test_util import UtilTest


def suite() -> TestSuite:
test_suite = TestSuite()
test_suite.addTest(makeSuite(MonitorTest))
test_suite.addTest(makeSuite(UtilTest))
return test_suite

0 comments on commit 400bf4f

Please sign in to comment.