diff --git a/README.md b/README.md index d739c7a..1701a8c 100644 --- a/README.md +++ b/README.md @@ -135,6 +135,7 @@ Check Options: --ignore-service NAME Ignore service NAME in checks --ignore-disk NAME Ignore disk NAME in health check + --ignore-pools NAME Ignore pool(s) in backup check (pool names, csv, e.g. 'ignore' or 'test,nobackup') -w THRESHOLD_WARNING, --warning THRESHOLD_WARNING Warning threshold for check value. Mutiple thresholds with name:value,name:value -c THRESHOLD_CRITICAL, --critical THRESHOLD_CRITICAL @@ -258,7 +259,7 @@ WARNING - Ceph Cluster is in warning state **Check ZFS pool health** ``` -./check_pve.py -u -p -e -m zfs-health -n pve +./check_pve.py -u -p -e -m zfs-health -n pve OK - All ZFS pools are healthy ``` diff --git a/check_pve.py b/check_pve.py index 321b2e7..a9149a7 100755 --- a/check_pve.py +++ b/check_pve.py @@ -666,6 +666,23 @@ def check_version(self) -> None: def check_vzdump_backup(self, name: Optional[str] = None) -> None: """Check for failed vzdump backup jobs.""" + def get_ignored_pool_members(ignored_pool) -> list: + url = self.get_url(f"pools/{ignored_pool}") + data = self.request(url) + mlist = list() + for member in data["members"]: + mlist.append(str(member["vmid"])) + return mlist + + def get_poolids() -> list: + url = self.get_url("pools") + pools = self.request(url) + id = list() + for poolid in pools: + id += [poolid["poolid"]] + return id + + tasks_url = self.get_url("cluster/tasks") tasks = self.request(tasks_url) tasks = [t for t in tasks if t["type"] == "vzdump"] @@ -697,12 +714,25 @@ def check_vzdump_backup(self, name: Optional[str] = None) -> None: nbu_url = self.get_url("cluster/backup-info/not-backed-up") not_backed_up = self.request(nbu_url) if len(not_backed_up) > 0: - guest_ids = " ".join([str(guest["vmid"]) for guest in not_backed_up]) - if self.check_result not in [CheckState.CRITICAL, CheckState.UNKNOWN]: - self.check_result = CheckState.WARNING - self.check_message += ( - f"\nThere are guests not covered by any backup schedule: {guest_ids}" - ) + guest_ids = list() + for guest in not_backed_up: + guest_ids.append(str(guest["vmid"])) + ignore_param = self.options.ignore_pools[0] + ignored_pools = ignore_param.split(",") + poolids = get_poolids() + pool_members = list() + for poolid in poolids: + if not poolid in ignored_pools: + continue + pool_members += get_ignored_pool_members(poolid) + ignored_vmids = pool_members + remaining_not_backed_up = list(set(guest_ids) -set(ignored_vmids)) + if len(remaining_not_backed_up) > 0: + if self.check_result not in [CheckState.CRITICAL, CheckState.UNKNOWN]: + self.check_result = CheckState.WARNING + self.check_message += ( + f"\nThere are unignored guests not covered by any backup schedule: {remaining_not_backed_up}" + ) def check_memory(self) -> None: """Check memory usage of Proxmox VE node.""" @@ -999,6 +1029,15 @@ def parse_args(self) -> None: default=[], ) + check_opts.add_argument( + "--ignore-pools", + dest="ignore_pools", + action="append", + metavar="NAME", + help="Ignore vms and containers in pool(s) NAME in checks", + default=[], + ) + check_opts.add_argument( "-w", "--warning",