Skip to content

Commit

Permalink
Always forward to prometheus the fqdn URL, not ingress (#195)
Browse files Browse the repository at this point in the history
* Update lib to v1.0
* Pass proxy envvars (copied from #180)
* Improve get_cluster_info api
* Use pydantic
* Add avalanche to bundle_1_e2e_tls.yaml
  • Loading branch information
sed-i authored Sep 27, 2023
1 parent 99dc29b commit 7e95796
Show file tree
Hide file tree
Showing 7 changed files with 112 additions and 73 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -25,11 +25,11 @@ def __init__(self, *args):
```
"""
import logging
import socket
from typing import Callable, List, Optional, Set
from typing import Dict, Optional, Set
from urllib.parse import urlparse

import ops
import pydantic
from ops.charm import CharmBase, RelationEvent, RelationJoinedEvent, RelationRole
from ops.framework import EventBase, EventSource, Object, ObjectEvents
from ops.model import Relation
Expand All @@ -38,18 +38,48 @@ def __init__(self, *args):
LIBID = "37f1ca6f8fe84e3092ebbf6dc2885310"

# Increment this major API version when introducing breaking changes
LIBAPI = 0
LIBAPI = 1

# Increment this PATCH version before using `charmcraft publish-lib` or reset
# to 0 if you are raising the major API version
LIBPATCH = 8
LIBPATCH = 0

PYDEPS = ["pydantic"]

# Set to match metadata.yaml
INTERFACE_NAME = "alertmanager_dispatch"

logger = logging.getLogger(__name__)


class _ProviderSchemaV0(pydantic.BaseModel):
# Currently, the provider splits the URL and the consumer merges. That's why we switched to v1.
public_address: str
scheme: str = "http"


class _ProviderSchemaV1(pydantic.BaseModel):
url: str

# The following are v0 fields that are continued to be populated for backwards compatibility.
# TODO: when we switch to pydantic 2+, use computed_field instead of the following fields, and
# also drop the __init__.
# https://docs.pydantic.dev/latest/api/fields/#pydantic.fields.computed_field
public_address: Optional[str] # v0 relic
scheme: Optional[str] # v0 relic

def __init__(self, **kwargs):
super().__init__(**kwargs)

parsed = urlparse(kwargs["url"])
port = ":" + str(parsed.port) if parsed.port else ""
public_address = f"{parsed.hostname}{port}{parsed.path}"

# Derive v0 fields from v1 field
self.public_address = public_address
self.scheme = parsed.scheme


class ClusterChanged(EventBase):
"""Event raised when an alertmanager cluster is changed.
Expand Down Expand Up @@ -195,33 +225,27 @@ def _on_relation_changed(self, event: ops.charm.RelationChangedEvent):
# inform consumer about the change
self.on.cluster_changed.emit() # pyright: ignore

def get_cluster_info(self) -> List[str]:
"""Returns a list of addresses of all the alertmanager units."""
def get_cluster_info(self) -> Set[str]:
"""Returns a list of URLs of all alertmanager units."""
if not (relation := self.charm.model.get_relation(self.name)):
return []

alertmanagers: List[str] = []
for unit in relation.units:
address = relation.data[unit].get("public_address")
if address:
alertmanagers.append(address)
return sorted(alertmanagers)

def get_cluster_info_with_scheme(self) -> List[str]:
"""Returns a list of URLs of all the alertmanager units."""
# FIXME: in v1 of the lib:
# - use a dict {"url": ...} so it's extendable
# - change return value to Set[str]
if not (relation := self.charm.model.get_relation(self.name)):
return []
return set()

alertmanagers: Set[str] = set()
for unit in relation.units:
address = relation.data[unit].get("public_address")
scheme = relation.data[unit].get("scheme", "http")
if address:
alertmanagers.add(f"{scheme}://{address}")
return sorted(alertmanagers)
if rel_data := relation.data[unit]:
try: # v1
data = _ProviderSchemaV1(**rel_data)
except pydantic.ValidationError as ev1:
try: # v0
data = _ProviderSchemaV0(**rel_data)
except pydantic.ValidationError as ev0:
logger.warning("Relation data failed validation for v1: %s", ev1)
logger.warning("Relation data failed validation for v0: %s", ev0)
else:
alertmanagers.add(f"{data.scheme}://{data.public_address}")
else:
alertmanagers.add(data.url)
return alertmanagers

def _on_relation_departed(self, _):
"""This hook notifies the charm that there may have been changes to the cluster."""
Expand Down Expand Up @@ -266,29 +290,20 @@ class AlertmanagerProvider(RelationManagerBase):
This provider auto-registers relation events on behalf of the main Alertmanager charm.
Arguments:
charm (CharmBase): consumer charm
relation_name (str): relation name (not interface name)
api_port (int): alertmanager server's api port; this is needed here to avoid accessing
charm constructs directly
Attributes:
charm (CharmBase): the Alertmanager charm
charm: consumer charm
external_url: URL for this unit's workload API endpoint
relation_name: relation name (not interface name)
"""

def __init__(
self,
charm,
relation_name: str = "alerting",
api_port: int = 9093, # TODO: breaking change: drop this arg
charm: CharmBase,
*,
external_url: Optional[Callable] = None, # TODO: breaking change: make this mandatory
external_url: str,
relation_name: str = "alerting",
):
# TODO: breaking change: force keyword-only args from relation_name onwards
super().__init__(charm, relation_name, RelationRole.provides)

# We don't need to worry about the literal "http" here because the external_url arg is set
# by the charm. TODO: drop it after external_url becomes a mandatory arg.
self._external_url = external_url or (lambda: f"http://{socket.getfqdn()}:{api_port}")
self._external_url = external_url

events = self.charm.on[self.name]

Expand All @@ -302,9 +317,9 @@ def _on_relation_joined(self, event: RelationJoinedEvent):
This is needed for consumers such as prometheus, which should be aware of all alertmanager
instances.
"""
self.update_relation_data(event)
self._update_relation_data(event)

def _generate_relation_data(self, relation: Relation):
def _generate_relation_data(self, relation: Relation) -> Dict[str, str]:
"""Helper function to generate relation data in the correct format.
Addresses are without scheme.
Expand All @@ -314,13 +329,10 @@ def _generate_relation_data(self, relation: Relation):
# deduplicate so that the config file only has one entry, but ideally the
# "alertmanagers.[].static_configs.targets" section in the prometheus config should list
# all units.
parsed = urlparse(self._external_url())
return {
"public_address": f"{parsed.hostname}:{parsed.port or 80}{parsed.path}",
"scheme": parsed.scheme,
}
data = _ProviderSchemaV1(url=self._external_url)
return data.dict()

def update_relation_data(self, event: Optional[RelationEvent] = None):
def _update_relation_data(self, event: Optional[RelationEvent] = None):
"""Helper function for updating relation data bags.
This function can be used in two different ways:
Expand All @@ -346,3 +358,8 @@ def update_relation_data(self, event: Optional[RelationEvent] = None):
event.relation.data[self.charm.unit].update(
self._generate_relation_data(event.relation)
)

def update(self, *, external_url: str):
"""Update data pertaining to this relation manager (similar args to __init__)."""
self._external_url = external_url
self._update_relation_data()
9 changes: 9 additions & 0 deletions src/alertmanager.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
"""Workload manager for alertmanaqger."""

import logging
import os
import re
from typing import Callable, Dict, List, Optional, Tuple

Expand Down Expand Up @@ -193,6 +194,13 @@ def _command():
f"{peer_cmd_args}"
)

def _environment():
return {
"https_proxy": os.environ.get("JUJU_CHARM_HTTPS_PROXY", ""),
"http_proxy": os.environ.get("JUJU_CHARM_HTTP_PROXY", ""),
"no_proxy": os.environ.get("JUJU_CHARM_NO_PROXY", ""),
}

return Layer(
{
"summary": "alertmanager layer",
Expand All @@ -203,6 +211,7 @@ def _command():
"summary": "alertmanager service",
"command": _command(),
"startup": "enabled",
"environment": _environment(),
}
},
}
Expand Down
19 changes: 8 additions & 11 deletions src/charm.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,10 +19,10 @@
WorkloadManagerError,
)
from alertmanager_client import Alertmanager, AlertmanagerBadResponse
from charms.alertmanager_k8s.v0.alertmanager_dispatch import AlertmanagerProvider
from charms.alertmanager_k8s.v0.alertmanager_remote_configuration import (
RemoteConfigurationRequirer,
)
from charms.alertmanager_k8s.v1.alertmanager_dispatch import AlertmanagerProvider
from charms.catalogue_k8s.v0.catalogue import CatalogueConsumer, CatalogueItem
from charms.grafana_k8s.v0.grafana_dashboard import GrafanaDashboardProvider
from charms.grafana_k8s.v0.grafana_source import GrafanaSourceProvider
Expand Down Expand Up @@ -105,17 +105,10 @@ def __init__(self, *args):
self.framework.observe(self.ingress.on.ready, self._handle_ingress) # pyright: ignore
self.framework.observe(self.ingress.on.revoked, self._handle_ingress) # pyright: ignore

# The `_external_url` property is passed as a callable so that the charm library code
# always uses up-to-date context.
# This arg is needed because in case of a custom event (e.g. ingress ready) or a re-emit,
# the charm won't be re-initialized with an updated external url.
# Also, coincidentally, unit tests would otherwise fail because harness doesn't
# reinitialize the charm between core events.
self.alertmanager_provider = AlertmanagerProvider(
self,
self._relations.alerting,
self._ports.api,
external_url=lambda: AlertmanagerCharm._external_url.fget(self), # type: ignore
relation_name=self._relations.alerting,
external_url=self._internal_url, # TODO See 'TODO' below, about external_url
)

self.api = Alertmanager(endpoint_url=self._external_url)
Expand Down Expand Up @@ -418,7 +411,11 @@ def _common_exit_hook(self, update_ca_certs: bool = False) -> None:
)
return

self.alertmanager_provider.update_relation_data()
# TODO Conditionally update with the external URL if it's a CMR, or rely on "recv-ca-cert"
# on the prometheus side.
# - https://github.com/canonical/operator/issues/970
# - https://github.com/canonical/prometheus-k8s-operator/issues/530,
self.alertmanager_provider.update(external_url=self._internal_url)

self.ingress.provide_ingress_requirements(
scheme=urlparse(self._internal_url).scheme, port=self.api_port
Expand Down
12 changes: 12 additions & 0 deletions tests/manual/bundle_1_e2e_tls.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,16 @@ applications:
channel: edge
scale: 1
trust: true
avalanche:
# The avalanche charm has always-firing alerts that can be used to verify prometheus is able to
# post alerts to alertmanager.
charm: avalanche-k8s
channel: edge
scale: 1
trust: true
options:
metric_count: 10
series_count: 2
local-ca:
charm: self-signed-certificates
channel: edge
Expand Down Expand Up @@ -43,3 +53,5 @@ relations:
- prometheus:ingress
- - alertmanager:self-metrics-endpoint
- prometheus:metrics-endpoint
- - avalanche:metrics-endpoint
- prometheus:metrics-endpoint
1 change: 1 addition & 0 deletions tests/unit/test_charm.py
Original file line number Diff line number Diff line change
Expand Up @@ -73,6 +73,7 @@ def test_relation_data_provides_public_address(self):
assert rel is not None # for static checker
expected_address = "fqdn:{}".format(self.harness.charm.api_port)
expected_rel_data = {
"url": "http://fqdn:9093",
"public_address": expected_address,
"scheme": "http",
}
Expand Down
24 changes: 13 additions & 11 deletions tests/unit/test_consumer.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
import unittest

import ops
from charms.alertmanager_k8s.v0.alertmanager_dispatch import AlertmanagerConsumer
from charms.alertmanager_k8s.v1.alertmanager_dispatch import AlertmanagerConsumer
from ops.charm import CharmBase
from ops.framework import StoredState
from ops.testing import Harness
Expand Down Expand Up @@ -79,7 +79,7 @@ def _add_alertmanager_units(self, rel_id: int, num_units: int, start_with=0):

def test_cluster_updated_after_alertmanager_units_join(self):
# before
self.assertEqual([], self.harness.charm.alertmanager_lib.get_cluster_info())
self.assertEqual(set(), self.harness.charm.alertmanager_lib.get_cluster_info())
num_events = self.harness.charm._stored.cluster_changed_emitted

# add relation
Expand All @@ -88,17 +88,18 @@ def test_cluster_updated_after_alertmanager_units_join(self):

# after
self.assertGreater(self.harness.charm._stored.cluster_changed_emitted, num_events)
self.assertListEqual(
["10.20.30.0", "10.20.30.1"], self.harness.charm.alertmanager_lib.get_cluster_info()
self.assertSetEqual(
{"http://10.20.30.0", "http://10.20.30.1"},
self.harness.charm.alertmanager_lib.get_cluster_info(),
)

num_events = self.harness.charm._stored.cluster_changed_emitted

# add another unit
self._add_alertmanager_units(rel_id, num_units=1, start_with=2)
self.assertGreater(self.harness.charm._stored.cluster_changed_emitted, num_events)
self.assertListEqual(
["10.20.30.0", "10.20.30.1", "10.20.30.2"],
self.assertSetEqual(
{"http://10.20.30.0", "http://10.20.30.1", "http://10.20.30.2"},
self.harness.charm.alertmanager_lib.get_cluster_info(),
)

Expand All @@ -119,7 +120,7 @@ def test_cluster_updated_after_alertmanager_unit_leaves(self):
self.harness.remove_relation_unit(rel_id, "am/2")
self.assertGreater(self.harness.charm._stored.cluster_changed_emitted, num_events)
after = self.harness.charm.alertmanager_lib.get_cluster_info()
self.assertListEqual(after, ["10.20.30.0", "10.20.30.1"])
self.assertSetEqual(after, {"http://10.20.30.0", "http://10.20.30.1"})

num_events = self.harness.charm._stored.cluster_changed_emitted

Expand All @@ -129,7 +130,7 @@ def test_cluster_updated_after_alertmanager_unit_leaves(self):
self.assertGreater(self.harness.charm._stored.cluster_changed_emitted, num_events)
after = self.harness.charm.alertmanager_lib.get_cluster_info()
self.assertGreater(self.harness.charm._stored.cluster_changed_emitted, num_events)
self.assertListEqual(after, [])
self.assertSetEqual(after, set())

def test_cluster_is_empty_after_relation_breaks(self):
# add relation
Expand All @@ -144,7 +145,7 @@ def test_cluster_is_empty_after_relation_breaks(self):
self.harness.remove_relation(rel_id)
after = self.harness.charm.alertmanager_lib.get_cluster_info()
self.assertGreater(self.harness.charm._stored.cluster_changed_emitted, num_events)
self.assertListEqual([], after)
self.assertSetEqual(set(), after)

def test_relation_changed(self):
# add relation
Expand All @@ -153,6 +154,7 @@ def test_relation_changed(self):

# update remote unit's relation data (emulates upgrade-charm)
self.harness.update_relation_data(rel_id, "am/1", {"public_address": "90.80.70.60"})
self.assertListEqual(
["10.20.30.0", "90.80.70.60"], self.harness.charm.alertmanager_lib.get_cluster_info()
self.assertSetEqual(
{"http://10.20.30.0", "http://90.80.70.60"},
self.harness.charm.alertmanager_lib.get_cluster_info(),
)
1 change: 1 addition & 0 deletions tox.ini
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,7 @@ deps =
pyright
charm: -r{toxinidir}/requirements.txt
lib: ops
pydantic < 2.0 # from alertmanager_k8s.v1.alertmanager_dispatch
setenv =
PYRIGHT_PYTHON_FORCE_VERSION = latest
commands =
Expand Down

0 comments on commit 7e95796

Please sign in to comment.