diff --git a/lib/charms/alertmanager_k8s/v0/alertmanager_dispatch.py b/lib/charms/alertmanager_k8s/v1/alertmanager_dispatch.py similarity index 80% rename from lib/charms/alertmanager_k8s/v0/alertmanager_dispatch.py rename to lib/charms/alertmanager_k8s/v1/alertmanager_dispatch.py index 233c3535..de726242 100644 --- a/lib/charms/alertmanager_k8s/v0/alertmanager_dispatch.py +++ b/lib/charms/alertmanager_k8s/v1/alertmanager_dispatch.py @@ -25,11 +25,11 @@ def __init__(self, *args): ``` """ import logging -import socket -from typing import Callable, List, Optional, Set +from typing import Dict, Optional, Set from urllib.parse import urlparse import ops +import pydantic from ops.charm import CharmBase, RelationEvent, RelationJoinedEvent, RelationRole from ops.framework import EventBase, EventSource, Object, ObjectEvents from ops.model import Relation @@ -38,11 +38,13 @@ def __init__(self, *args): LIBID = "37f1ca6f8fe84e3092ebbf6dc2885310" # Increment this major API version when introducing breaking changes -LIBAPI = 0 +LIBAPI = 1 # Increment this PATCH version before using `charmcraft publish-lib` or reset # to 0 if you are raising the major API version -LIBPATCH = 8 +LIBPATCH = 0 + +PYDEPS = ["pydantic"] # Set to match metadata.yaml INTERFACE_NAME = "alertmanager_dispatch" @@ -50,6 +52,34 @@ def __init__(self, *args): logger = logging.getLogger(__name__) +class _ProviderSchemaV0(pydantic.BaseModel): + # Currently, the provider splits the URL and the consumer merges. That's why we switched to v1. + public_address: str + scheme: str = "http" + + +class _ProviderSchemaV1(pydantic.BaseModel): + url: str + + # The following are v0 fields that are continued to be populated for backwards compatibility. + # TODO: when we switch to pydantic 2+, use computed_field instead of the following fields, and + # also drop the __init__. + # https://docs.pydantic.dev/latest/api/fields/#pydantic.fields.computed_field + public_address: Optional[str] # v0 relic + scheme: Optional[str] # v0 relic + + def __init__(self, **kwargs): + super().__init__(**kwargs) + + parsed = urlparse(kwargs["url"]) + port = ":" + str(parsed.port) if parsed.port else "" + public_address = f"{parsed.hostname}{port}{parsed.path}" + + # Derive v0 fields from v1 field + self.public_address = public_address + self.scheme = parsed.scheme + + class ClusterChanged(EventBase): """Event raised when an alertmanager cluster is changed. @@ -195,33 +225,27 @@ def _on_relation_changed(self, event: ops.charm.RelationChangedEvent): # inform consumer about the change self.on.cluster_changed.emit() # pyright: ignore - def get_cluster_info(self) -> List[str]: - """Returns a list of addresses of all the alertmanager units.""" + def get_cluster_info(self) -> Set[str]: + """Returns a list of URLs of all alertmanager units.""" if not (relation := self.charm.model.get_relation(self.name)): - return [] - - alertmanagers: List[str] = [] - for unit in relation.units: - address = relation.data[unit].get("public_address") - if address: - alertmanagers.append(address) - return sorted(alertmanagers) - - def get_cluster_info_with_scheme(self) -> List[str]: - """Returns a list of URLs of all the alertmanager units.""" - # FIXME: in v1 of the lib: - # - use a dict {"url": ...} so it's extendable - # - change return value to Set[str] - if not (relation := self.charm.model.get_relation(self.name)): - return [] + return set() alertmanagers: Set[str] = set() for unit in relation.units: - address = relation.data[unit].get("public_address") - scheme = relation.data[unit].get("scheme", "http") - if address: - alertmanagers.add(f"{scheme}://{address}") - return sorted(alertmanagers) + if rel_data := relation.data[unit]: + try: # v1 + data = _ProviderSchemaV1(**rel_data) + except pydantic.ValidationError as ev1: + try: # v0 + data = _ProviderSchemaV0(**rel_data) + except pydantic.ValidationError as ev0: + logger.warning("Relation data failed validation for v1: %s", ev1) + logger.warning("Relation data failed validation for v0: %s", ev0) + else: + alertmanagers.add(f"{data.scheme}://{data.public_address}") + else: + alertmanagers.add(data.url) + return alertmanagers def _on_relation_departed(self, _): """This hook notifies the charm that there may have been changes to the cluster.""" @@ -266,29 +290,20 @@ class AlertmanagerProvider(RelationManagerBase): This provider auto-registers relation events on behalf of the main Alertmanager charm. Arguments: - charm (CharmBase): consumer charm - relation_name (str): relation name (not interface name) - api_port (int): alertmanager server's api port; this is needed here to avoid accessing - charm constructs directly - - Attributes: - charm (CharmBase): the Alertmanager charm + charm: consumer charm + external_url: URL for this unit's workload API endpoint + relation_name: relation name (not interface name) """ def __init__( self, - charm, - relation_name: str = "alerting", - api_port: int = 9093, # TODO: breaking change: drop this arg + charm: CharmBase, *, - external_url: Optional[Callable] = None, # TODO: breaking change: make this mandatory + external_url: str, + relation_name: str = "alerting", ): - # TODO: breaking change: force keyword-only args from relation_name onwards super().__init__(charm, relation_name, RelationRole.provides) - - # We don't need to worry about the literal "http" here because the external_url arg is set - # by the charm. TODO: drop it after external_url becomes a mandatory arg. - self._external_url = external_url or (lambda: f"http://{socket.getfqdn()}:{api_port}") + self._external_url = external_url events = self.charm.on[self.name] @@ -302,9 +317,9 @@ def _on_relation_joined(self, event: RelationJoinedEvent): This is needed for consumers such as prometheus, which should be aware of all alertmanager instances. """ - self.update_relation_data(event) + self._update_relation_data(event) - def _generate_relation_data(self, relation: Relation): + def _generate_relation_data(self, relation: Relation) -> Dict[str, str]: """Helper function to generate relation data in the correct format. Addresses are without scheme. @@ -314,13 +329,10 @@ def _generate_relation_data(self, relation: Relation): # deduplicate so that the config file only has one entry, but ideally the # "alertmanagers.[].static_configs.targets" section in the prometheus config should list # all units. - parsed = urlparse(self._external_url()) - return { - "public_address": f"{parsed.hostname}:{parsed.port or 80}{parsed.path}", - "scheme": parsed.scheme, - } + data = _ProviderSchemaV1(url=self._external_url) + return data.dict() - def update_relation_data(self, event: Optional[RelationEvent] = None): + def _update_relation_data(self, event: Optional[RelationEvent] = None): """Helper function for updating relation data bags. This function can be used in two different ways: @@ -346,3 +358,8 @@ def update_relation_data(self, event: Optional[RelationEvent] = None): event.relation.data[self.charm.unit].update( self._generate_relation_data(event.relation) ) + + def update(self, *, external_url: str): + """Update data pertaining to this relation manager (similar args to __init__).""" + self._external_url = external_url + self._update_relation_data() diff --git a/src/alertmanager.py b/src/alertmanager.py index 5aabeca1..3aa29c45 100644 --- a/src/alertmanager.py +++ b/src/alertmanager.py @@ -5,6 +5,7 @@ """Workload manager for alertmanaqger.""" import logging +import os import re from typing import Callable, Dict, List, Optional, Tuple @@ -193,6 +194,13 @@ def _command(): f"{peer_cmd_args}" ) + def _environment(): + return { + "https_proxy": os.environ.get("JUJU_CHARM_HTTPS_PROXY", ""), + "http_proxy": os.environ.get("JUJU_CHARM_HTTP_PROXY", ""), + "no_proxy": os.environ.get("JUJU_CHARM_NO_PROXY", ""), + } + return Layer( { "summary": "alertmanager layer", @@ -203,6 +211,7 @@ def _command(): "summary": "alertmanager service", "command": _command(), "startup": "enabled", + "environment": _environment(), } }, } diff --git a/src/charm.py b/src/charm.py index 2bd78fe5..04816132 100755 --- a/src/charm.py +++ b/src/charm.py @@ -19,10 +19,10 @@ WorkloadManagerError, ) from alertmanager_client import Alertmanager, AlertmanagerBadResponse -from charms.alertmanager_k8s.v0.alertmanager_dispatch import AlertmanagerProvider from charms.alertmanager_k8s.v0.alertmanager_remote_configuration import ( RemoteConfigurationRequirer, ) +from charms.alertmanager_k8s.v1.alertmanager_dispatch import AlertmanagerProvider from charms.catalogue_k8s.v0.catalogue import CatalogueConsumer, CatalogueItem from charms.grafana_k8s.v0.grafana_dashboard import GrafanaDashboardProvider from charms.grafana_k8s.v0.grafana_source import GrafanaSourceProvider @@ -105,17 +105,10 @@ def __init__(self, *args): self.framework.observe(self.ingress.on.ready, self._handle_ingress) # pyright: ignore self.framework.observe(self.ingress.on.revoked, self._handle_ingress) # pyright: ignore - # The `_external_url` property is passed as a callable so that the charm library code - # always uses up-to-date context. - # This arg is needed because in case of a custom event (e.g. ingress ready) or a re-emit, - # the charm won't be re-initialized with an updated external url. - # Also, coincidentally, unit tests would otherwise fail because harness doesn't - # reinitialize the charm between core events. self.alertmanager_provider = AlertmanagerProvider( self, - self._relations.alerting, - self._ports.api, - external_url=lambda: AlertmanagerCharm._external_url.fget(self), # type: ignore + relation_name=self._relations.alerting, + external_url=self._internal_url, # TODO See 'TODO' below, about external_url ) self.api = Alertmanager(endpoint_url=self._external_url) @@ -418,7 +411,11 @@ def _common_exit_hook(self, update_ca_certs: bool = False) -> None: ) return - self.alertmanager_provider.update_relation_data() + # TODO Conditionally update with the external URL if it's a CMR, or rely on "recv-ca-cert" + # on the prometheus side. + # - https://github.com/canonical/operator/issues/970 + # - https://github.com/canonical/prometheus-k8s-operator/issues/530, + self.alertmanager_provider.update(external_url=self._internal_url) self.ingress.provide_ingress_requirements( scheme=urlparse(self._internal_url).scheme, port=self.api_port diff --git a/tests/manual/bundle_1_e2e_tls.yaml b/tests/manual/bundle_1_e2e_tls.yaml index 0e974171..71a097fd 100644 --- a/tests/manual/bundle_1_e2e_tls.yaml +++ b/tests/manual/bundle_1_e2e_tls.yaml @@ -12,6 +12,16 @@ applications: channel: edge scale: 1 trust: true + avalanche: + # The avalanche charm has always-firing alerts that can be used to verify prometheus is able to + # post alerts to alertmanager. + charm: avalanche-k8s + channel: edge + scale: 1 + trust: true + options: + metric_count: 10 + series_count: 2 local-ca: charm: self-signed-certificates channel: edge @@ -43,3 +53,5 @@ relations: - prometheus:ingress - - alertmanager:self-metrics-endpoint - prometheus:metrics-endpoint +- - avalanche:metrics-endpoint + - prometheus:metrics-endpoint diff --git a/tests/unit/test_charm.py b/tests/unit/test_charm.py index 34dbe040..18375ade 100644 --- a/tests/unit/test_charm.py +++ b/tests/unit/test_charm.py @@ -73,6 +73,7 @@ def test_relation_data_provides_public_address(self): assert rel is not None # for static checker expected_address = "fqdn:{}".format(self.harness.charm.api_port) expected_rel_data = { + "url": "http://fqdn:9093", "public_address": expected_address, "scheme": "http", } diff --git a/tests/unit/test_consumer.py b/tests/unit/test_consumer.py index eedfe2ba..83bfab4f 100644 --- a/tests/unit/test_consumer.py +++ b/tests/unit/test_consumer.py @@ -6,7 +6,7 @@ import unittest import ops -from charms.alertmanager_k8s.v0.alertmanager_dispatch import AlertmanagerConsumer +from charms.alertmanager_k8s.v1.alertmanager_dispatch import AlertmanagerConsumer from ops.charm import CharmBase from ops.framework import StoredState from ops.testing import Harness @@ -79,7 +79,7 @@ def _add_alertmanager_units(self, rel_id: int, num_units: int, start_with=0): def test_cluster_updated_after_alertmanager_units_join(self): # before - self.assertEqual([], self.harness.charm.alertmanager_lib.get_cluster_info()) + self.assertEqual(set(), self.harness.charm.alertmanager_lib.get_cluster_info()) num_events = self.harness.charm._stored.cluster_changed_emitted # add relation @@ -88,8 +88,9 @@ def test_cluster_updated_after_alertmanager_units_join(self): # after self.assertGreater(self.harness.charm._stored.cluster_changed_emitted, num_events) - self.assertListEqual( - ["10.20.30.0", "10.20.30.1"], self.harness.charm.alertmanager_lib.get_cluster_info() + self.assertSetEqual( + {"http://10.20.30.0", "http://10.20.30.1"}, + self.harness.charm.alertmanager_lib.get_cluster_info(), ) num_events = self.harness.charm._stored.cluster_changed_emitted @@ -97,8 +98,8 @@ def test_cluster_updated_after_alertmanager_units_join(self): # add another unit self._add_alertmanager_units(rel_id, num_units=1, start_with=2) self.assertGreater(self.harness.charm._stored.cluster_changed_emitted, num_events) - self.assertListEqual( - ["10.20.30.0", "10.20.30.1", "10.20.30.2"], + self.assertSetEqual( + {"http://10.20.30.0", "http://10.20.30.1", "http://10.20.30.2"}, self.harness.charm.alertmanager_lib.get_cluster_info(), ) @@ -119,7 +120,7 @@ def test_cluster_updated_after_alertmanager_unit_leaves(self): self.harness.remove_relation_unit(rel_id, "am/2") self.assertGreater(self.harness.charm._stored.cluster_changed_emitted, num_events) after = self.harness.charm.alertmanager_lib.get_cluster_info() - self.assertListEqual(after, ["10.20.30.0", "10.20.30.1"]) + self.assertSetEqual(after, {"http://10.20.30.0", "http://10.20.30.1"}) num_events = self.harness.charm._stored.cluster_changed_emitted @@ -129,7 +130,7 @@ def test_cluster_updated_after_alertmanager_unit_leaves(self): self.assertGreater(self.harness.charm._stored.cluster_changed_emitted, num_events) after = self.harness.charm.alertmanager_lib.get_cluster_info() self.assertGreater(self.harness.charm._stored.cluster_changed_emitted, num_events) - self.assertListEqual(after, []) + self.assertSetEqual(after, set()) def test_cluster_is_empty_after_relation_breaks(self): # add relation @@ -144,7 +145,7 @@ def test_cluster_is_empty_after_relation_breaks(self): self.harness.remove_relation(rel_id) after = self.harness.charm.alertmanager_lib.get_cluster_info() self.assertGreater(self.harness.charm._stored.cluster_changed_emitted, num_events) - self.assertListEqual([], after) + self.assertSetEqual(set(), after) def test_relation_changed(self): # add relation @@ -153,6 +154,7 @@ def test_relation_changed(self): # update remote unit's relation data (emulates upgrade-charm) self.harness.update_relation_data(rel_id, "am/1", {"public_address": "90.80.70.60"}) - self.assertListEqual( - ["10.20.30.0", "90.80.70.60"], self.harness.charm.alertmanager_lib.get_cluster_info() + self.assertSetEqual( + {"http://10.20.30.0", "http://90.80.70.60"}, + self.harness.charm.alertmanager_lib.get_cluster_info(), ) diff --git a/tox.ini b/tox.ini index 66a2f139..8cbb4fba 100644 --- a/tox.ini +++ b/tox.ini @@ -55,6 +55,7 @@ deps = pyright charm: -r{toxinidir}/requirements.txt lib: ops + pydantic < 2.0 # from alertmanager_k8s.v1.alertmanager_dispatch setenv = PYRIGHT_PYTHON_FORCE_VERSION = latest commands =