From 43234f418b5d6d29cbd9695a64e208e2f70d0ef7 Mon Sep 17 00:00:00 2001 From: Nicolas Herment Date: Thu, 20 Feb 2025 07:16:20 +0100 Subject: [PATCH 01/14] feat: prometheus + graph generating capability --- holmes/core/openai_formatting.py | 49 + holmes/core/performance_timing.py | 33 +- holmes/core/tools.py | 39 +- .../prompts/generic_ask_conversation.jinja2 | 16 + holmes/plugins/toolsets/__init__.py | 6 +- holmes/plugins/toolsets/datetime.py | 33 + holmes/plugins/toolsets/prometheus.py | 418 +++ holmes/utils/cache.py | 84 + .../execute_prometheus_range_query.txt | 1270 ++++++++ .../get_current_time.txt | 2 + .../list_available_metrics.txt | 95 + .../test_case.yaml | 5 + .../execute_prometheus_range_query.txt | 801 +++++ .../get_current_time.txt | 2 + .../kubectl_find_resource.txt | 5 + .../list_available_metrics.txt | 4 + .../test_case.yaml | 5 + .../execute_prometheus_range_query.txt | 267 ++ ..._prometheus_range_query_with_namespace.txt | 267 ++ .../get_current_time.txt | 2 + .../kubectl_find_resource.txt | 5 + .../list_available_metrics.txt | 4 + .../test_case.yaml | 5 + .../execute_prometheus_range_query.txt | 125 + .../execute_prometheus_range_query_by_pod.txt | 127 + ..._prometheus_range_query_with_namespace.txt | 125 + ...heus_range_query_with_namespace_by_pod.txt | 267 ++ .../get_current_time.txt | 2 + .../kubectl_find_resource.txt | 5 + .../kubectl_get_by_name.txt | 6 + .../kubectl_lineage_children.txt | 23 + .../kubectl_top_pods.txt | 6 + .../list_available_metrics.txt | 4 + .../slow_oom_deployment.yaml | 32 + .../test_case.yaml | 13 + .../execute_prometheus_range_query.txt | 231 ++ .../get_current_time.txt | 2 + .../33_cpu_throttling_graph/helm/Dockerfile | 19 + .../33_cpu_throttling_graph/helm/app.py | 54 + .../33_cpu_throttling_graph/helm/build.sh | 1 + .../helm/manifest.yaml | 100 + .../helm/requirements.txt | 5 + .../kubectl_find_resource.txt | 5 + .../kubectl_top_pods.txt | 6 + .../list_available_metrics.txt | 4 + .../33_cpu_throttling_graph/test_case.yaml | 7 + .../execute_prometheus_range_query.txt | 1100 +++++++ .../execute_prometheus_range_query_2.txt | 2598 +++++++++++++++++ .../get_current_time.txt | 2 + .../34_http_latency_graph/helm/Dockerfile | 19 + .../34_http_latency_graph/helm/app.py | 79 + .../34_http_latency_graph/helm/build.sh | 1 + .../34_http_latency_graph/helm/manifest.yaml | 81 + .../helm/requirements.txt | 5 + .../list_available_metrics.txt | 36 + .../34_http_latency_graph/test_case.yaml | 8 + tests/llm/test_ask_holmes.py | 29 +- tests/llm/test_investigate.py | 27 +- tests/llm/utils/braintrust.py | 4 + tests/llm/utils/mock_toolset.py | 8 +- tests/test_prometheus.py | 136 + 61 files changed, 8651 insertions(+), 68 deletions(-) create mode 100644 holmes/core/openai_formatting.py create mode 100644 holmes/plugins/toolsets/datetime.py create mode 100644 holmes/plugins/toolsets/prometheus.py create mode 100644 holmes/utils/cache.py create mode 100644 tests/llm/fixtures/test_ask_holmes/29_basic_promql_graph_cluster_memory/execute_prometheus_range_query.txt create mode 100644 tests/llm/fixtures/test_ask_holmes/29_basic_promql_graph_cluster_memory/get_current_time.txt create mode 100644 tests/llm/fixtures/test_ask_holmes/29_basic_promql_graph_cluster_memory/list_available_metrics.txt create mode 100644 tests/llm/fixtures/test_ask_holmes/29_basic_promql_graph_cluster_memory/test_case.yaml create mode 100644 tests/llm/fixtures/test_ask_holmes/30_basic_promql_graph_pod_memory/execute_prometheus_range_query.txt create mode 100644 tests/llm/fixtures/test_ask_holmes/30_basic_promql_graph_pod_memory/get_current_time.txt create mode 100644 tests/llm/fixtures/test_ask_holmes/30_basic_promql_graph_pod_memory/kubectl_find_resource.txt create mode 100644 tests/llm/fixtures/test_ask_holmes/30_basic_promql_graph_pod_memory/list_available_metrics.txt create mode 100644 tests/llm/fixtures/test_ask_holmes/30_basic_promql_graph_pod_memory/test_case.yaml create mode 100644 tests/llm/fixtures/test_ask_holmes/31_basic_promql_graph_pod_cpu/execute_prometheus_range_query.txt create mode 100644 tests/llm/fixtures/test_ask_holmes/31_basic_promql_graph_pod_cpu/execute_prometheus_range_query_with_namespace.txt create mode 100644 tests/llm/fixtures/test_ask_holmes/31_basic_promql_graph_pod_cpu/get_current_time.txt create mode 100644 tests/llm/fixtures/test_ask_holmes/31_basic_promql_graph_pod_cpu/kubectl_find_resource.txt create mode 100644 tests/llm/fixtures/test_ask_holmes/31_basic_promql_graph_pod_cpu/list_available_metrics.txt create mode 100644 tests/llm/fixtures/test_ask_holmes/31_basic_promql_graph_pod_cpu/test_case.yaml create mode 100644 tests/llm/fixtures/test_ask_holmes/32_oom_kill_graph_memory_consumption/execute_prometheus_range_query.txt create mode 100644 tests/llm/fixtures/test_ask_holmes/32_oom_kill_graph_memory_consumption/execute_prometheus_range_query_by_pod.txt create mode 100644 tests/llm/fixtures/test_ask_holmes/32_oom_kill_graph_memory_consumption/execute_prometheus_range_query_with_namespace.txt create mode 100644 tests/llm/fixtures/test_ask_holmes/32_oom_kill_graph_memory_consumption/execute_prometheus_range_query_with_namespace_by_pod.txt create mode 100644 tests/llm/fixtures/test_ask_holmes/32_oom_kill_graph_memory_consumption/get_current_time.txt create mode 100644 tests/llm/fixtures/test_ask_holmes/32_oom_kill_graph_memory_consumption/kubectl_find_resource.txt create mode 100644 tests/llm/fixtures/test_ask_holmes/32_oom_kill_graph_memory_consumption/kubectl_get_by_name.txt create mode 100644 tests/llm/fixtures/test_ask_holmes/32_oom_kill_graph_memory_consumption/kubectl_lineage_children.txt create mode 100644 tests/llm/fixtures/test_ask_holmes/32_oom_kill_graph_memory_consumption/kubectl_top_pods.txt create mode 100644 tests/llm/fixtures/test_ask_holmes/32_oom_kill_graph_memory_consumption/list_available_metrics.txt create mode 100644 tests/llm/fixtures/test_ask_holmes/32_oom_kill_graph_memory_consumption/slow_oom_deployment.yaml create mode 100644 tests/llm/fixtures/test_ask_holmes/32_oom_kill_graph_memory_consumption/test_case.yaml create mode 100644 tests/llm/fixtures/test_ask_holmes/33_cpu_throttling_graph/execute_prometheus_range_query.txt create mode 100644 tests/llm/fixtures/test_ask_holmes/33_cpu_throttling_graph/get_current_time.txt create mode 100644 tests/llm/fixtures/test_ask_holmes/33_cpu_throttling_graph/helm/Dockerfile create mode 100644 tests/llm/fixtures/test_ask_holmes/33_cpu_throttling_graph/helm/app.py create mode 100755 tests/llm/fixtures/test_ask_holmes/33_cpu_throttling_graph/helm/build.sh create mode 100644 tests/llm/fixtures/test_ask_holmes/33_cpu_throttling_graph/helm/manifest.yaml create mode 100644 tests/llm/fixtures/test_ask_holmes/33_cpu_throttling_graph/helm/requirements.txt create mode 100644 tests/llm/fixtures/test_ask_holmes/33_cpu_throttling_graph/kubectl_find_resource.txt create mode 100644 tests/llm/fixtures/test_ask_holmes/33_cpu_throttling_graph/kubectl_top_pods.txt create mode 100644 tests/llm/fixtures/test_ask_holmes/33_cpu_throttling_graph/list_available_metrics.txt create mode 100644 tests/llm/fixtures/test_ask_holmes/33_cpu_throttling_graph/test_case.yaml create mode 100644 tests/llm/fixtures/test_ask_holmes/34_http_latency_graph/execute_prometheus_range_query.txt create mode 100644 tests/llm/fixtures/test_ask_holmes/34_http_latency_graph/execute_prometheus_range_query_2.txt create mode 100644 tests/llm/fixtures/test_ask_holmes/34_http_latency_graph/get_current_time.txt create mode 100644 tests/llm/fixtures/test_ask_holmes/34_http_latency_graph/helm/Dockerfile create mode 100644 tests/llm/fixtures/test_ask_holmes/34_http_latency_graph/helm/app.py create mode 100755 tests/llm/fixtures/test_ask_holmes/34_http_latency_graph/helm/build.sh create mode 100644 tests/llm/fixtures/test_ask_holmes/34_http_latency_graph/helm/manifest.yaml create mode 100644 tests/llm/fixtures/test_ask_holmes/34_http_latency_graph/helm/requirements.txt create mode 100644 tests/llm/fixtures/test_ask_holmes/34_http_latency_graph/list_available_metrics.txt create mode 100644 tests/llm/fixtures/test_ask_holmes/34_http_latency_graph/test_case.yaml create mode 100644 tests/test_prometheus.py diff --git a/holmes/core/openai_formatting.py b/holmes/core/openai_formatting.py new file mode 100644 index 00000000..e9f68a04 --- /dev/null +++ b/holmes/core/openai_formatting.py @@ -0,0 +1,49 @@ +import re + +pattern = r"^(array\[(?P\w+)\])|(?P\w+)$" + + +def type_to_open_ai_schema(type_value): + match = re.match(pattern, type_value.strip()) + + if not match: + raise ValueError(f"Invalid type format: {type_value}") + + if match.group("inner_type"): + return {"type": "array", "items": {"type": match.group("inner_type")}} + + else: + return {"type": match.group("simple_type")} + + +def format_tool_to_open_ai_standard( + tool_name: str, tool_description: str, tool_parameters: dict +): + tool_properties = {} + for param_name, param_attributes in tool_parameters.items(): + tool_properties[param_name] = type_to_open_ai_schema(param_attributes.type) + if param_attributes.description is not None: + tool_properties[param_name]["description"] = param_attributes.description + + result = { + "type": "function", + "function": { + "name": tool_name, + "description": tool_description, + "parameters": { + "properties": tool_properties, + "required": [ + param_name + for param_name, param_attributes in tool_parameters.items() + if param_attributes.required + ], + "type": "object", + }, + }, + } + + # gemini doesnt have parameters object if it is without params + if tool_properties is None: + result["function"].pop("parameters") + + return result diff --git a/holmes/core/performance_timing.py b/holmes/core/performance_timing.py index 3c5b07b1..ad1a6807 100644 --- a/holmes/core/performance_timing.py +++ b/holmes/core/performance_timing.py @@ -49,14 +49,25 @@ def end(self): ) -def log_function_timing(func): - @wraps(func) - def function_timing_wrapper(*args, **kwargs): - start_time = time.perf_counter() - result = func(*args, **kwargs) - end_time = time.perf_counter() - total_time = int((end_time - start_time) * 1000) - logging.info(f'Function "{func.__name__}()" took {total_time}ms') - return result - - return function_timing_wrapper +def log_function_timing(label=None): + def decorator(func): + @wraps(func) + def function_timing_wrapper(*args, **kwargs): + start_time = time.perf_counter() + result = func(*args, **kwargs) + end_time = time.perf_counter() + total_time = int((end_time - start_time) * 1000) + + function_identifier = ( + f'"{label}: {func.__name__}()"' if label else f'"{func.__name__}()"' + ) + logging.info(f"Function {function_identifier} took {total_time}ms") + return result + + return function_timing_wrapper + + if callable(label): + func = label + label = None + return decorator(func) + return decorator diff --git a/holmes/core/tools.py b/holmes/core/tools.py index 578af072..af28dd0f 100644 --- a/holmes/core/tools.py +++ b/holmes/core/tools.py @@ -18,6 +18,8 @@ model_validator, ) +from holmes.core.openai_formatting import format_tool_to_open_ai_standard + ToolsetPattern = Union[Literal["*"], List[str]] @@ -80,36 +82,11 @@ class Tool(ABC, BaseModel): additional_instructions: Optional[str] = None def get_openai_format(self): - tool_properties = {} - for param_name, param_attributes in self.parameters.items(): - tool_properties[param_name] = {"type": param_attributes.type} - if param_attributes.description is not None: - tool_properties[param_name]["description"] = ( - param_attributes.description - ) - - result = { - "type": "function", - "function": { - "name": self.name, - "description": self.description, - "parameters": { - "properties": tool_properties, - "required": [ - param_name - for param_name, param_attributes in self.parameters.items() - if param_attributes.required - ], - "type": "object", - }, - }, - } - - # gemini doesnt have parameters object if it is without params - if tool_properties is None: - result["function"].pop("parameters") - - return result + return format_tool_to_open_ai_standard( + tool_name=self.name, + tool_description=self.description, + tool_parameters=self.parameters, + ) @abstractmethod def invoke(self, params: Dict) -> str: @@ -415,7 +392,7 @@ def invoke(self, tool_name: str, params: Dict) -> str: tool = self.get_tool_by_name(tool_name) return tool.invoke(params) if tool else "" - def get_tool_by_name(self, name: str) -> Optional[YAMLTool]: + def get_tool_by_name(self, name: str) -> Optional[Tool]: if name in self.tools_by_name: return self.tools_by_name[name] logging.warning(f"could not find tool {name}. skipping") diff --git a/holmes/plugins/prompts/generic_ask_conversation.jinja2 b/holmes/plugins/prompts/generic_ask_conversation.jinja2 index d78ef321..bc5c6425 100644 --- a/holmes/plugins/prompts/generic_ask_conversation.jinja2 +++ b/holmes/plugins/prompts/generic_ask_conversation.jinja2 @@ -8,6 +8,22 @@ Use conversation history to maintain continuity when appropriate, ensuring effic {% include '_general_instructions.jinja2' %} +Prometheus/PromQL queries +* Use prometheus to execute promql queries with the tools `execute_prometheus_instant_query` and `execute_prometheus_range_query` +* Use these tools to retroactively check metrics. Here are standard metrics but you can use different ones: +** For memory consumption: `container_memory_working_set_bytes` +** For CPU usage: `container_cpu_usage_seconds_total` +** For CPU throttling: `container_cpu_cfs_throttled_periods_total` +** For latencies, prefer using `_sum` / `_count` over a sliding window +** Avoid using `_bucket` unless you know the bucket's boundaries are configured correctly +** Prefer individual averages like `rate(_sum) / rate(_count)` +** Avoid global averages like `sum(rate(_sum)) / sum(rate(_count))` because it hides data and is not generally informative +* ALWAYS embed the execution results into your answer if you use the data for your answer +* You only need to embed the partial result in your response. Include the `tool_name` and `random_key`. For example: << {tool_name: "execute_prometheus_query", random_key: "92jf2hf"} >> +* Post processing will parse your response, retrieve the tool output and create a chart +* Only generate and execute a prometheus query after checking what metrics are available with the `list_available_metrics` tool. Filter as needed +* You MUST get the current time before executing a prometheus range query + Style guide: * Reply with terse output. * Be painfully concise. diff --git a/holmes/plugins/toolsets/__init__.py b/holmes/plugins/toolsets/__init__.py index 9cc345a6..84f7ce8a 100644 --- a/holmes/plugins/toolsets/__init__.py +++ b/holmes/plugins/toolsets/__init__.py @@ -4,13 +4,15 @@ from typing import List, Optional from holmes.core.supabase_dal import SupabaseDal +from holmes.plugins.toolsets.datetime import DatetimeToolset from holmes.plugins.toolsets.robusta import RobustaToolset from holmes.plugins.toolsets.grafana.toolset_grafana_loki import GrafanaLokiToolset from holmes.plugins.toolsets.grafana.toolset_grafana_tempo import GrafanaTempoToolset from holmes.plugins.toolsets.internet import InternetToolset +from holmes.plugins.toolsets.opensearch import OpenSearchToolset +from holmes.plugins.toolsets.prometheus import PrometheusToolset from holmes.core.tools import Toolset, YAMLToolset -from holmes.plugins.toolsets.opensearch import OpenSearchToolset import yaml THIS_DIR = os.path.abspath(os.path.dirname(__file__)) @@ -46,6 +48,8 @@ def load_python_toolsets(dal: Optional[SupabaseDal]) -> List[Toolset]: OpenSearchToolset(), GrafanaLokiToolset(), GrafanaTempoToolset(), + PrometheusToolset(), + DatetimeToolset(), ] return toolsets diff --git a/holmes/plugins/toolsets/datetime.py b/holmes/plugins/toolsets/datetime.py new file mode 100644 index 00000000..6f37623c --- /dev/null +++ b/holmes/plugins/toolsets/datetime.py @@ -0,0 +1,33 @@ +from holmes.core.tools import ToolsetTag +from typing import Dict +from holmes.core.tools import Tool, Toolset +import datetime + + +class CurrentTime(Tool): + def __init__(self): + super().__init__( + name="get_current_time", + description="Return current time information. Useful to build queries that require a time information", + parameters={}, + ) + + def invoke(self, params: Dict) -> str: + now = datetime.datetime.now(datetime.timezone.utc) + return f"The current UTC date and time are {now}. The current UTC timestamp in seconds is {int(now.timestamp())}." + + def get_parameterized_one_liner(self, params) -> str: + return "fetched current time" + + +class DatetimeToolset(Toolset): + def __init__(self): + super().__init__( + name="datetime", + enabled=True, + description="Current date and time information", + icon_url="https://platform.robusta.dev/demos/internet-access.svg", + prerequisites=[], + tools=[CurrentTime()], + tags=[ToolsetTag.CORE], + ) diff --git a/holmes/plugins/toolsets/prometheus.py b/holmes/plugins/toolsets/prometheus.py new file mode 100644 index 00000000..d0ef549b --- /dev/null +++ b/holmes/plugins/toolsets/prometheus.py @@ -0,0 +1,418 @@ +import os +import re +import logging +import random +import string +import time + +from typing import Any, Union, Optional + +import requests +from pydantic import BaseModel +from holmes.core.tools import ( + CallablePrerequisite, + Tool, + ToolParameter, + Toolset, + ToolsetTag, +) +import json +from requests import RequestException + +from urllib.parse import urljoin + +from holmes.utils.cache import TTLCache + +cache = None + + +class PrometheusConfig(BaseModel): + prometheus_url: Union[str, None] + # Setting to None will remove the time window from the request for labels + metrics_labels_time_window_hrs: Union[int, None] = 48 + # Setting to None will disable the cache + metrics_labels_cache_duration_hrs: Union[int, None] = 12 + + +class BasePrometheusTool(Tool): + toolset: "PrometheusToolset" + + +def generate_random_key(): + return "".join(random.choices(string.ascii_letters + string.digits, k=4)) + + +def filter_metrics_by_type(metrics: dict, expected_type: str): + return { + metric_name: metric_data + for metric_name, metric_data in metrics.items() + if metric_data.get("type") == expected_type + } + + +def filter_metrics_by_name(metrics: dict, pattern: str) -> dict: + regex = re.compile(pattern) + return { + metric_name: metric_data + for metric_name, metric_data in metrics.items() + if regex.search(metric_name) + } + + +def fetch_metadata(url: str) -> dict: + metadata_url = urljoin(url, "/api/v1/metadata") + metadata_response = requests.get(metadata_url, timeout=60, verify=True) + + metadata_response.raise_for_status() + + metadata = metadata_response.json()["data"] + return metadata + + +def fetch_metrics_labels( + prometheus_url: str, + cache: Optional[TTLCache], + metrics_labels_time_window_hrs: Union[int, None], +) -> dict: + """This is a slow query. Takes 5+ seconds to run""" + + if cache: + cached_result = cache.get("metrics_labels") + if cached_result: + logging.info("fetch_metrics_labels() result retrieved from cache") + return cached_result + + series_url = urljoin(prometheus_url, "/api/v1/series") + params: dict = { + "match[]": '{__name__!=""}', + } + if metrics_labels_time_window_hrs is not None: + params["end_time"] = int(time.time()) + params["start_time"] = params["end_time"] - ( + metrics_labels_time_window_hrs * 60 * 60 + ) + + series_response = requests.get( + url=series_url, params=params, timeout=60, verify=True + ) + series_response.raise_for_status() + series = series_response.json()["data"] + + metrics_labels: dict = {} + for serie in series: + metric_name = serie["__name__"] + # Add all labels except __name__ + labels = {k for k in serie.keys() if k != "__name__"} + if metric_name in metrics_labels: + metrics_labels[metric_name].update(labels) + else: + metrics_labels[metric_name] = labels + if cache: + cache.set("metrics_labels", metrics_labels) + return metrics_labels + + +def fetch_metrics( + url: str, + cache: Optional[TTLCache], + metrics_labels_time_window_hrs: Union[int, None], +) -> dict: + metadata = fetch_metadata(url) + metrics_labels = fetch_metrics_labels(url, cache, metrics_labels_time_window_hrs) + + metrics = {} + for metric_name, meta_list in metadata.items(): + if meta_list: + metric_type = meta_list[0].get("type", "unknown") + metric_description = meta_list[0].get("help", "unknown") + metrics[metric_name] = { + "type": metric_type, + "description": metric_description, + "labels": set(), + } + + for metric_name in metrics: + if metric_name in metrics_labels: + metrics[metric_name]["labels"] = metrics_labels[metric_name] + + return metrics + + +class ListAvailableMetrics(BasePrometheusTool): + def __init__(self, toolset: "PrometheusToolset"): + super().__init__( + name="list_available_metrics", + description="List all the available metrics to query from prometheus, including their types (counter, gauge, histogram, summary) and available labels.", + parameters={ + "type_filter": ToolParameter( + description="Optional filter to only return a specific metric type. Can be one of counter, gauge, histogram, summary", + type="string", + required=False, + ), + "name_filter": ToolParameter( + description="Only the metrics partially or fully matching this name will be returned", + type="string", + required=True, + ), + }, + toolset=toolset, + ) + self._cache = None + + def invoke(self, params: Any) -> str: + if not self.toolset.config or not self.toolset.config.prometheus_url: + return "Prometheus is not configured. Prometheus URL is missing" + if not self._cache and self.toolset.config.metrics_labels_cache_duration_hrs: + self._cache = TTLCache( + self.toolset.config.metrics_labels_cache_duration_hrs * 3600 + ) + try: + prometheus_url = self.toolset.config.prometheus_url + metrics_labels_time_window_hrs = ( + self.toolset.config.metrics_labels_time_window_hrs + ) + if not prometheus_url: + return "Prometheus is not configured. Prometheus URL is missing" + + name_filter = params.get("name_filter") + if not name_filter: + return "Error: cannot run tool 'list_available_metrics'. The param 'name_filter' is required but is missing." + metrics = fetch_metrics( + prometheus_url, self._cache, metrics_labels_time_window_hrs + ) + + metrics = filter_metrics_by_name(metrics, name_filter) + + if params.get("type_filter"): + metrics = filter_metrics_by_type(metrics, params.get("type_filter")) + + output = ["Metric | Description | Type | Labels"] + output.append("-" * 100) + + for metric, info in sorted(metrics.items()): + labels_str = ( + ", ".join(sorted(info["labels"])) if info["labels"] else "none" + ) + output.append( + f"{metric} | {info['description']} | {info['type']} | {labels_str}" + ) + + table_output = "\n".join(output) + return table_output + + except requests.Timeout: + logging.warn("Timeout while fetching prometheus metrics", exc_info=True) + return "Request timed out while fetching metrics" + except RequestException as e: + logging.warn("Failed to fetch prometheus metrics", exc_info=True) + return f"Network error while fetching metrics: {str(e)}" + except Exception as e: + logging.warn("Failed to process prometheus metrics", exc_info=True) + return f"Unexpected error: {str(e)}" + + def get_parameterized_one_liner(self, params) -> str: + return f'list available prometheus metrics: name_filter="{params.get("name_filter", "")}", type_filter="{params.get("type_filter", "")}"' + + +class ExecuteQuery(BasePrometheusTool): + def __init__(self, toolset: "PrometheusToolset"): + super().__init__( + name="execute_prometheus_instant_query", + description="Execute an instant PromQL query", + parameters={ + "query": ToolParameter( + description="The PromQL query", + type="string", + required=True, + ), + "description": ToolParameter( + description="Describes the query", + type="string", + required=True, + ), + }, + toolset=toolset, + ) + + def invoke(self, params: Any) -> str: + if not self.toolset.config or not self.toolset.config.prometheus_url: + return "Prometheus is not configured. Prometheus URL is missing" + try: + query = params.get("query", "") + description = params.get("description", "") + + url = urljoin(self.toolset.config.prometheus_url, "/api/v1/query") + + payload = {"query": query} + + response = requests.post(url=url, data=payload, timeout=60) + + if response.status_code == 200: + data = response.json() + data["random_key"] = generate_random_key() + data["tool_name"] = self.name + data["description"] = description + data["query"] = query + data_str = json.dumps(data, indent=2) + return data_str + + # Handle known Prometheus error status codes + error_msg = "Unknown error occurred" + if response.status_code in [400, 429]: + try: + error_data = response.json() + error_msg = error_data.get( + "error", error_data.get("message", str(response.content)) + ) + except json.JSONDecodeError: + pass + return ( + f"Query execution failed. HTTP {response.status_code}: {error_msg}" + ) + + # For other status codes, just return the status code and content + return f"Query execution failed with unexpected status code: {response.status_code}. Response: {response.content}" + + except RequestException as e: + logging.info("Failed to connect to Prometheus", exc_info=True) + return f"Connection error to Prometheus: {str(e)}" + except Exception as e: + logging.info("Failed to connect to Prometheus", exc_info=True) + return f"Unexpected error executing query: {str(e)}" + + def get_parameterized_one_liner(self, params) -> str: + query = params.get("query") + description = params.get("description") + return f"Prometheus query. query={query}, description={description}" + + +class ExecuteRangeQuery(BasePrometheusTool): + def __init__(self, toolset: "PrometheusToolset"): + super().__init__( + name="execute_prometheus_range_query", + description="Execute a PromQL range query", + parameters={ + "query": ToolParameter( + description="The PromQL query", + type="string", + required=True, + ), + "description": ToolParameter( + description="Describes the query", + type="string", + required=True, + ), + "start": ToolParameter( + description="Start timestamp, inclusive. rfc3339 or unix_timestamp", + type="string", + required=True, + ), + "end": ToolParameter( + description="End timestamp, inclusive. rfc3339 or unix_timestamp", + type="string", + required=True, + ), + "step": ToolParameter( + description="Query resolution step width in duration format or float number of seconds", + type="number", + required=True, + ), + }, + toolset=toolset, + ) + + def invoke(self, params: Any) -> str: + if not self.toolset.config or not self.toolset.config.prometheus_url: + return "Prometheus is not configured. Prometheus URL is missing" + + try: + url = urljoin(self.toolset.config.prometheus_url, "/api/v1/query_range") + + query = params.get("query", "") + start = params.get("start", "") + end = params.get("end", "") + step = params.get("step", "") + description = params.get("description", "") + + payload = { + "query": query, + "start": start, + "end": end, + "step": step, + } + + response = requests.post(url=url, data=payload, timeout=120) + + if response.status_code == 200: + data = response.json() + + data["random_key"] = generate_random_key() + data["tool_name"] = self.name + data["description"] = description + data["query"] = query + data["start"] = start + data["end"] = end + data["step"] = step + data_str = json.dumps(data, indent=2) + return data_str + + error_msg = "Unknown error occurred" + if response.status_code in [400, 429]: + try: + error_data = response.json() + error_msg = error_data.get( + "error", error_data.get("message", str(response.content)) + ) + except json.JSONDecodeError: + pass + return ( + f"Query execution failed. HTTP {response.status_code}: {error_msg}" + ) + + return f"Query execution failed with unexpected status code: {response.status_code}. Response: {response.content}" + + except RequestException as e: + logging.info("Failed to connect to Prometheus", exc_info=True) + return f"Connection error to Prometheus: {str(e)}" + except Exception as e: + logging.info("Failed to connect to Prometheus", exc_info=True) + return f"Unexpected error executing query: {str(e)}" + + def get_parameterized_one_liner(self, params) -> str: + query = params.get("query") + start = params.get("start") + end = params.get("end") + step = params.get("step") + description = params.get("description") + return f"Prometheus query_range. query={query}, start={start}, end={end}, step={step}, description={description}" + + +class PrometheusToolset(Toolset): + def __init__(self): + super().__init__( + name="prometheus/metrics", + description="Prometheus integration to fetch metadata and execute PromQL queries", + docs_url="https://docs.robusta.dev/master/configuration/holmesgpt/toolsets/prometheus.html", + icon_url="https://upload.wikimedia.org/wikipedia/commons/3/38/Prometheus_software_logo.svg", + prerequisites=[CallablePrerequisite(callable=self.prerequisites_callable)], + tools=[ + ListAvailableMetrics(toolset=self), + ExecuteQuery(toolset=self), + ExecuteRangeQuery(toolset=self), + ], + tags=[ + ToolsetTag.CORE, + ], + ) + + def prerequisites_callable(self, config: dict[str, Any]) -> bool: + if not config and not os.environ.get("PROMETHEUS_URL", None): + return False + elif not config and os.environ.get("PROMETHEUS_URL", None): + self.config = PrometheusConfig( + prometheus_url=os.environ.get("PROMETHEUS_URL") + ) + return True + else: + self.config = PrometheusConfig(**config) + return True diff --git a/holmes/utils/cache.py b/holmes/utils/cache.py new file mode 100644 index 00000000..3554e13c --- /dev/null +++ b/holmes/utils/cache.py @@ -0,0 +1,84 @@ +import time +from threading import Timer +from typing import Any, Dict, Optional +import json +import bz2 + + +class SetEncoder(json.JSONEncoder): + def default(self, o): + if isinstance(o, set): + return list(o) + return json.JSONEncoder.default(self, o) + + +def compress(data): + json_str = json.dumps(data, cls=SetEncoder) + json_bytes = json_str.encode("utf-8") + compressed = bz2.compress(json_bytes) + + return compressed + + +def decompress(compressed_data): + try: + decompressed = bz2.decompress(compressed_data) + json_str = decompressed.decode("utf-8") + data = json.loads(json_str) + return data + except Exception as e: + raise Exception(f"Decompression failed: {str(e)}") + + +class TTLCache: + def __init__(self, ttl_seconds: int): + self._cache: Dict[str, Dict[str, Any]] = {} + self._ttl = ttl_seconds + self._evict_interval = max(self._ttl / 10, 60) + self._evict_timer = None + self._start_evict_timer() + + def _start_evict_timer(self): + self._evict_timer = Timer(self._evict_interval, self._evict) + self._evict_timer.daemon = ( + True # Allow the program to exit even if timer is alive + ) + self._evict_timer.start() + + def _evict(self): + current_time = time.time() + expired_keys = [ + key for key, item in self._cache.items() if item["expiry"] <= current_time + ] + + for key in expired_keys: + del self._cache[key] + + self._start_evict_timer() + + def set(self, key: str, value: Any) -> None: + expiry = time.time() + self._ttl + + self._cache[key] = {"value": compress(value), "expiry": expiry} + + def get(self, key: str) -> Optional[Any]: + item = self._cache.get(key) + + if item is None: + return None + + if item["expiry"] <= time.time(): + del self._cache[key] + return None + + return decompress(item["value"]) + + def delete(self, key: str) -> None: + self._cache.pop(key, None) + + def clear(self) -> None: + self._cache.clear() + + def __del__(self): + if self._evict_timer: + self._evict_timer.cancel() diff --git a/tests/llm/fixtures/test_ask_holmes/29_basic_promql_graph_cluster_memory/execute_prometheus_range_query.txt b/tests/llm/fixtures/test_ask_holmes/29_basic_promql_graph_cluster_memory/execute_prometheus_range_query.txt new file mode 100644 index 00000000..7bab3f26 --- /dev/null +++ b/tests/llm/fixtures/test_ask_holmes/29_basic_promql_graph_cluster_memory/execute_prometheus_range_query.txt @@ -0,0 +1,1270 @@ +{"toolset_name":"prometheus/metrics","tool_name":"execute_prometheus_range_query","match_params":{"query":"*","description":"*","start":"*","end":"*","step":"*"}} +{ + "status": "success", + "data": { + "resultType": "matrix", + "result": [ + { + "metric": { + "namespace": "argocd" + }, + "values": [ + [ + 1738162001, + "782409728" + ], + [ + 1738162031, + "782692352" + ], + [ + 1738162061, + "782700544" + ], + [ + 1738162091, + "782426112" + ], + [ + 1738162121, + "782155776" + ], + [ + 1738162151, + "782401536" + ], + [ + 1738162181, + "782143488" + ], + [ + 1738162211, + "782139392" + ], + [ + 1738162241, + "782143488" + ], + [ + 1738162271, + "782168064" + ], + [ + 1738162301, + "782426112" + ], + [ + 1738162331, + "782430208" + ], + [ + 1738162361, + "782688256" + ], + [ + 1738162391, + "782712832" + ], + [ + 1738162421, + "782159872" + ], + [ + 1738162451, + "782434304" + ], + [ + 1738162481, + "782422016" + ], + [ + 1738162511, + "782172160" + ], + [ + 1738162541, + "782163968" + ], + [ + 1738162571, + "782430208" + ], + [ + 1738162601, + "782151680" + ], + [ + 1738162631, + "782422016" + ], + [ + 1738162661, + "782434304" + ], + [ + 1738162691, + "782409728" + ], + [ + 1738162721, + "782143488" + ], + [ + 1738162751, + "782442496" + ], + [ + 1738162781, + "782213120" + ], + [ + 1738162811, + "782434304" + ], + [ + 1738162841, + "782446592" + ], + [ + 1738162871, + "782172160" + ], + [ + 1738162901, + "782458880" + ], + [ + 1738162931, + "782692352" + ], + [ + 1738162961, + "782688256" + ], + [ + 1738162991, + "782184448" + ], + [ + 1738163021, + "782446592" + ], + [ + 1738163051, + "782696448" + ], + [ + 1738163081, + "782430208" + ], + [ + 1738163111, + "781275136" + ], + [ + 1738163141, + "780996608" + ], + [ + 1738163171, + "780992512" + ], + [ + 1738163201, + "780726272" + ], + [ + 1738163231, + "780750848" + ], + [ + 1738163261, + "780742656" + ], + [ + 1738163291, + "781262848" + ], + [ + 1738163321, + "780750848" + ], + [ + 1738163351, + "780750848" + ], + [ + 1738163381, + "781017088" + ], + [ + 1738163411, + "781004800" + ], + [ + 1738163441, + "781004800" + ], + [ + 1738163471, + "781037568" + ], + [ + 1738163501, + "781029376" + ], + [ + 1738163531, + "780767232" + ], + [ + 1738163561, + "780759040" + ], + [ + 1738163591, + "781037568" + ], + [ + 1738163621, + "781045760" + ], + [ + 1738163651, + "781570048" + ], + [ + 1738163681, + "781041664" + ], + [ + 1738163711, + "781058048" + ], + [ + 1738163741, + "781021184" + ], + [ + 1738163771, + "781033472" + ], + [ + 1738163801, + "780759040" + ] + ] + }, + { + "metric": { + "namespace": "default" + }, + "values": [ + [ + 1738162001, + "14914916352" + ], + [ + 1738162031, + "14920581120" + ], + [kind-grafana-cloud + 1738162061, + "14959681536" + ], + [ + 1738162091, + "14967799808" + ], + [ + 1738162121, + "14976086016" + ], + [ + 1738162151, + "14969462784" + ], + [ + 1738162181, + "14942916608" + ], + [ + 1738162211, + "14932123648" + ], + [ + 1738162241, + "14990041088" + ], + [ + 1738162271, + "15017783296" + ], + [ + 1738162301, + "14981828608" + ], + [ + 1738162331, + "15013658624" + ], + [ + 1738162361, + "15031767040" + ], + [ + 1738162391, + "15024631808" + ], + [ + 1738162421, + "15031906304" + ], + [ + 1738162451, + "15011442688" + ], + [ + 1738162481, + "15015374848" + ], + [ + 1738162511, + "15028559872" + ], + [ + 1738162541, + "15039492096" + ], + [ + 1738162571, + "14992990208" + ], + [ + 1738162601, + "14985097216" + ], + [ + 1738162631, + "14942883840" + ], + [ + 1738162661, + "15208288256" + ], + [ + 1738162691, + "15175557120" + ], + [ + 1738162721, + "15177445376" + ], + [ + 1738162751, + "15179718656" + ], + [ + 1738162781, + "16764833792" + ], + [ + 1738162811, + "16750112768" + ], + [ + 1738162841, + "16324071424" + ], + [ + 1738162871, + "17035313152" + ], + [ + 1738162901, + "16249901056" + ], + [ + 1738162931, + "15430139904" + ], + [ + 1738162961, + "15603331072" + ], + [ + 1738162991, + "15855284224" + ], + [ + 1738163021, + "15034163200" + ], + [ + 1738163051, + "14576488448" + ], + [ + 1738163081, + "14605860864" + ], + [ + 1738163111, + "14632443904" + ], + [ + 1738163141, + "14638026752" + ], + [ + 1738163171, + "15656050688" + ], + [ + 1738163201, + "15687675904" + ], + [ + 1738163231, + "15727095808" + ], + [ + 1738163261, + "15859519488" + ], + [ + 1738163291, + "15273623552" + ], + [ + 1738163321, + "15244197888" + ], + [ + 1738163351, + "14565888000" + ], + [ + 1738163381, + "15233122304" + ], + [ + 1738163411, + "15021203456" + ], + [ + 1738163441, + "14451933184" + ], + [ + 1738163471, + "14449156096" + ], + [ + 1738163501, + "14472785920" + ], + [ + 1738163531, + "14520791040" + ], + [ + 1738163561, + "14470537216" + ], + [ + 1738163591, + "14452244480" + ], + [ + 1738163621, + "13654368256" + ], + [ + 1738163651, + "13661114368" + ], + [ + 1738163681, + "13604032512" + ], + [ + 1738163711, + "13678948352" + ], + [ + 1738163741, + "13712158720" + ], + [ + 1738163771, + "13720158208" + ], + [ + 1738163801, + "13677768704" + ] + ] + }, + { + "metric": { + "namespace": "kube-system" + }, + "values": [ + [ + 1738162001, + "3344011264" + ], + [ + 1738162031, + "3336151040" + ], + [ + 1738162061, + "3334979584" + ], + [ + 1738162091, + "3334868992" + ], + [ + 1738162121, + "3349721088" + ], + [ + 1738162151, + "3338203136" + ], + [ + 1738162181, + "3339788288" + ], + [ + 1738162211, + "3353309184" + ], + [ + 1738162241, + "3352088576" + ], + [ + 1738162271, + "3349053440" + ], + [ + 1738162301, + "3362312192" + ], + [ + 1738162331, + "3372867584" + ], + [ + 1738162361, + "3374501888" + ], + [ + 1738162391, + "3381321728" + ], + [ + 1738162421, + "3297435648" + ], + [ + 1738162451, + "3297333248" + ], + [ + 1738162481, + "3319226368" + ], + [ + 1738162511, + "3325181952" + ], + [ + 1738162541, + "3301502976" + ], + [ + 1738162571, + "3415863296" + ], + [ + 1738162601, + "3416260608" + ], + [ + 1738162631, + "3415916544" + ], + [ + 1738162661, + "3383025664" + ], + [ + 1738162691, + "3383173120" + ], + [ + 1738162721, + "3384684544" + ], + [ + 1738162751, + "3391975424" + ], + [ + 1738162781, + "3342675968" + ], + [ + 1738162811, + "3345620992" + ], + [ + 1738162841, + "3366748160" + ], + [ + 1738162871, + "3367297024" + ], + [ + 1738162901, + "3363966976" + ], + [ + 1738162931, + "3370590208" + ], + [ + 1738162961, + "3373965312" + ], + [ + 1738162991, + "3374338048" + ], + [ + 1738163021, + "3376738304" + ], + [ + 1738163051, + "3317305344" + ], + [ + 1738163081, + "3317108736" + ], + [ + 1738163111, + "3332677632" + ], + [ + 1738163141, + "3334553600" + ], + [ + 1738163171, + "3367936000" + ], + [ + 1738163201, + "3381387264" + ], + [ + 1738163231, + "3380572160" + ], + [ + 1738163261, + "3376762880" + ], + [ + 1738163291, + "3379228672" + ], + [ + 1738163321, + "3302445056" + ], + [ + 1738163351, + "3302092800" + ], + [ + 1738163381, + "3320926208" + ], + [ + 1738163411, + "3321548800" + ], + [ + 1738163441, + "3311190016" + ], + [ + 1738163471, + "3440578560" + ], + [ + 1738163501, + "3441373184" + ], + [ + 1738163531, + "3442544640" + ], + [ + 1738163561, + "3447529472" + ], + [ + 1738163591, + "3392344064" + ], + [ + 1738163621, + "3393798144" + ], + [ + 1738163651, + "3399401472" + ], + [ + 1738163681, + "3401732096" + ], + [ + 1738163711, + "3298332672" + ], + [ + 1738163741, + "3311722496" + ], + [ + 1738163771, + "3358076928" + ], + [ + 1738163801, + "3360256000" + ] + ] + }, + { + "metric": { + "namespace": "local-path-storage" + }, + "values": [ + [ + 1738162001, + "81846272" + ], + [ + 1738162031, + "81846272" + ], + [ + 1738162061, + "81846272" + ], + [ + 1738162091, + "81846272" + ], + [ + 1738162121, + "81846272" + ], + [ + 1738162151, + "81846272" + ], + [ + 1738162181, + "81846272" + ], + [ + 1738162211, + "81846272" + ], + [ + 1738162241, + "81846272" + ], + [ + 1738162271, + "81846272" + ], + [ + 1738162301, + "81846272" + ], + [ + 1738162331, + "81846272" + ], + [ + 1738162361, + "81846272" + ], + [ + 1738162391, + "81846272" + ], + [ + 1738162421, + "81846272" + ], + [ + 1738162451, + "81846272" + ], + [ + 1738162481, + "81846272" + ], + [ + 1738162511, + "81846272" + ], + [ + 1738162541, + "81846272" + ], + [ + 1738162571, + "81846272" + ], + [ + 1738162601, + "81846272" + ], + [ + 1738162631, + "81846272" + ], + [ + 1738162661, + "81846272" + ], + [ + 1738162691, + "81846272" + ], + [ + 1738162721, + "81846272" + ], + [ + 1738162751, + "81846272" + ], + [ + 1738162781, + "81846272" + ], + [ + 1738162811, + "81846272" + ], + [ + 1738162841, + "81846272" + ], + [ + 1738162871, + "81846272" + ], + [ + 1738162901, + "81846272" + ], + [ + 1738162931, + "81846272" + ], + [ + 1738162961, + "81846272" + ], + [ + 1738162991, + "81846272" + ], + [ + 1738163021, + "81846272" + ], + [ + 1738163051, + "81846272" + ], + [ + 1738163081, + "81846272" + ], + [ + 1738163111, + "81846272" + ], + [ + 1738163141, + "81846272" + ], + [ + 1738163171, + "81850368" + ], + [ + 1738163201, + "81846272" + ], + [ + 1738163231, + "81846272" + ], + [ + 1738163261, + "81846272" + ], + [ + 1738163291, + "81846272" + ], + [ + 1738163321, + "82108416" + ], + [ + 1738163351, + "81846272" + ], + [ + 1738163381, + "81846272" + ], + [ + 1738163411, + "81846272" + ], + [ + 1738163441, + "81846272" + ], + [ + 1738163471, + "81846272" + ], + [ + 1738163501, + "81846272" + ], + [ + 1738163531, + "81846272" + ], + [ + 1738163561, + "81846272" + ], + [ + 1738163591, + "81846272" + ], + [ + 1738163621, + "81846272" + ], + [ + 1738163651, + "81846272" + ], + [ + 1738163681, + "81846272" + ], + [ + 1738163711, + "81846272" + ], + [ + 1738163741, + "81846272" + ], + [ + 1738163771, + "81846272" + ], + [ + 1738163801, + "81846272" + ] + ] + }, + { + "metric": { + "namespace": "sock-shop" + }, + "values": [ + [ + 1738162001, + "6530822144" + ], + [ + 1738162031, + "6530859008" + ], + [ + 1738162061, + "6530666496" + ], + [ + 1738162091, + "6530818048" + ], + [ + 1738162121, + "6531596288" + ], + [ + 1738162151, + "6531211264" + ], + [ + 1738162181, + "6530506752" + ], + [ + 1738162211, + "6531481600" + ], + [ + 1738162241, + "6531796992" + ], + [ + 1738162271, + "6531776512" + ], + [ + 1738162301, + "6532218880" + ], + [ + 1738162331, + "6532534272" + ], + [ + 1738162361, + "6531362816" + ], + [ + 1738162391, + "6531465216" + ], + [ + 1738162421, + "6531506176" + ], + [ + 1738162451, + "6531444736" + ], + [ + 1738162481, + "6531563520" + ], + [ + 1738162511, + "6529724416" + ], + [ + 1738162541, + "6529998848" + ], + [ + 1738162571, + "6530473984" + ], + [ + 1738162601, + "6530330624" + ], + [ + 1738162631, + "6530359296" + ], + [ + 1738162661, + "6531346432" + ], + [ + 1738162691, + "6531612672" + ], + [ + 1738162721, + "6531342336" + ], + [ + 1738162751, + "6531026944" + ], + [ + 1738162781, + "6531420160" + ], + [ + 1738162811, + "6531809280" + ], + [ + 1738162841, + "6532534272" + ], + [ + 1738162871, + "6533292032" + ], + [ + 1738162901, + "6532567040" + ], + [ + 1738162931, + "6533378048" + ], + [ + 1738162961, + "6533582848" + ], + [ + 1738162991, + "6534295552" + ], + [ + 1738163021, + "6533734400" + ], + [ + 1738163051, + "6534025216" + ], + [ + 1738163081, + "6534119424" + ], + [ + 1738163111, + "6534279168" + ], + [ + 1738163141, + "6535139328" + ], + [ + 1738163171, + "6534729728" + ], + [ + 1738163201, + "6534250496" + ], + [ + 1738163231, + "6536454144" + ], + [ + 1738163261, + "6535843840" + ], + [ + 1738163291, + "6536699904" + ], + [ + 1738163321, + "6536892416" + ], + [ + 1738163351, + "6537519104" + ], + [ + 1738163381, + "6537195520" + ], + [ + 1738163411, + "6537035776" + ], + [ + 1738163441, + "6536896512" + ], + [ + 1738163471, + "6537318400" + ], + [ + 1738163501, + "6537957376" + ], + [ + 1738163531, + "6538096640" + ], + [ + 1738163561, + "6538461184" + ], + [ + 1738163591, + "6537748480" + ], + [ + 1738163621, + "6538604544" + ], + [ + 1738163651, + "6540648448" + ], + [ + 1738163681, + "6539235328" + ], + [ + 1738163711, + "6539284480" + ], + [ + 1738163741, + "6539341824" + ], + [ + 1738163771, + "6540587008" + ], + [ + 1738163801, + "6540685312" + ] + ] + } + ] + }, + "random_key": "AtEJ", + "tool_name": "execute_prometheus_range_query", + "start": "2025-01-29T14:46:41Z", + "end": "2025-01-29T15:16:41Z", + "step": 30, + "description": "Memory usage for the Kubernetes cluster" +} diff --git a/tests/llm/fixtures/test_ask_holmes/29_basic_promql_graph_cluster_memory/get_current_time.txt b/tests/llm/fixtures/test_ask_holmes/29_basic_promql_graph_cluster_memory/get_current_time.txt new file mode 100644 index 00000000..f3fb4f6a --- /dev/null +++ b/tests/llm/fixtures/test_ask_holmes/29_basic_promql_graph_cluster_memory/get_current_time.txt @@ -0,0 +1,2 @@ +{"toolset_name":"datetime","tool_name":"get_current_time","match_params":{}} +The current UTC date and time are 2025-01-29 15:16:41.224942. The current UTC timestamp in seconds is 1738160201. diff --git a/tests/llm/fixtures/test_ask_holmes/29_basic_promql_graph_cluster_memory/list_available_metrics.txt b/tests/llm/fixtures/test_ask_holmes/29_basic_promql_graph_cluster_memory/list_available_metrics.txt new file mode 100644 index 00000000..928d7ff6 --- /dev/null +++ b/tests/llm/fixtures/test_ask_holmes/29_basic_promql_graph_cluster_memory/list_available_metrics.txt @@ -0,0 +1,95 @@ +{"toolset_name":"prometheus/metrics","tool_name":"list_available_metrics","match_params":{}} +Metric | Type | Description | Labels +---------------------------------------------------------------------------------------------------- +container_memory_cache | gauge | Number of bytes of page cache memory. | container, endpoint, id, image, instance, job, metrics_path, name, namespace, node, pod, service +container_memory_failcnt | counter | Number of memory usage hits limits | container, endpoint, id, image, instance, job, metrics_path, name, namespace, node, pod, service +container_memory_failures_total | counter | Cumulative count of memory allocation failures. | container, endpoint, failure_type, id, image, instance, job, metrics_path, name, namespace, node, pod, scope, service +container_memory_kernel_usage | gauge | Size of kernel memory allocated in bytes. | container, endpoint, id, image, instance, job, metrics_path, name, namespace, node, pod, service +container_memory_mapped_file | gauge | Size of memory mapped files in bytes. | none +container_memory_max_usage_bytes | gauge | Maximum memory usage recorded in bytes | container, endpoint, id, image, instance, job, metrics_path, name, namespace, node, pod, service +container_memory_rss | gauge | Size of RSS in bytes. | container, endpoint, id, image, instance, job, metrics_path, name, namespace, node, pod, service +container_memory_swap | gauge | Container swap usage in bytes. | none +container_memory_usage_bytes | gauge | Current memory usage in bytes, including all memory regardless of when it was accessed | container, endpoint, id, image, instance, job, metrics_path, name, namespace, node, pod, service +container_memory_working_set_bytes | gauge | Current working set in bytes. | container, endpoint, id, image, instance, job, metrics_path, name, namespace, node, pod, service +container_spec_memory_limit_bytes | gauge | Memory limit for the container. | none +container_spec_memory_reservation_limit_bytes | gauge | Memory reservation limit for the container. | none +container_spec_memory_swap_limit_bytes | gauge | Memory swap limit for the container. | none +go_memory_classes_heap_free_bytes | gauge | Memory that is completely free and eligible to be returned to the underlying system, but has not been. This metric is the runtime's estimate of free address space that is backed by physical memory. | endpoint, instance, job, metrics_path, namespace, node, service +go_memory_classes_heap_objects_bytes | gauge | Memory occupied by live objects and dead objects that have not yet been marked free by the garbage collector. | endpoint, instance, job, metrics_path, namespace, node, service +go_memory_classes_heap_released_bytes | gauge | Memory that is completely free and has been returned to the underlying system. This metric is the runtime's estimate of free address space that is still mapped into the process, but is not backed by physical memory. | endpoint, instance, job, metrics_path, namespace, node, service +go_memory_classes_heap_stacks_bytes | gauge | Memory allocated from the heap that is reserved for stack space, whether or not it is currently in-use. Currently, this represents all stack memory for goroutines. It also includes all OS thread stacks in non-cgo programs. Note that stacks may be allocated differently in the future, and this may change. | endpoint, instance, job, metrics_path, namespace, node, service +go_memory_classes_heap_unused_bytes | gauge | Memory that is reserved for heap objects but is not currently used to hold heap objects. | endpoint, instance, job, metrics_path, namespace, node, service +go_memory_classes_metadata_mcache_free_bytes | gauge | Memory that is reserved for runtime mcache structures, but not in-use. | endpoint, instance, job, metrics_path, namespace, node, service +go_memory_classes_metadata_mcache_inuse_bytes | gauge | Memory that is occupied by runtime mcache structures that are currently being used. | endpoint, instance, job, metrics_path, namespace, node, service +go_memory_classes_metadata_mspan_free_bytes | gauge | Memory that is reserved for runtime mspan structures, but not in-use. | endpoint, instance, job, metrics_path, namespace, node, service +go_memory_classes_metadata_mspan_inuse_bytes | gauge | Memory that is occupied by runtime mspan structures that are currently being used. | endpoint, instance, job, metrics_path, namespace, node, service +go_memory_classes_metadata_other_bytes | gauge | Memory that is reserved for or used to hold runtime metadata. | endpoint, instance, job, metrics_path, namespace, node, service +go_memory_classes_os_stacks_bytes | gauge | Stack memory allocated by the underlying operating system. In non-cgo programs this metric is currently zero. This may change in the future.In cgo programs this metric includes OS thread stacks allocated directly from the OS. Currently, this only accounts for one stack in c-shared and c-archive build modes, and other sources of stacks from the OS are not measured. This too may change in the future. | endpoint, instance, job, metrics_path, namespace, node, service +go_memory_classes_other_bytes | gauge | Memory used by execution trace buffers, structures for debugging the runtime, finalizer and profiler specials, and more. | endpoint, instance, job, metrics_path, namespace, node, service +go_memory_classes_profiling_buckets_bytes | gauge | Memory that is used by the stack trace hash map used for profiling. | endpoint, instance, job, metrics_path, namespace, node, service +go_memory_classes_total_bytes | gauge | All memory mapped by the Go runtime into the current process as read-write. Note that this does not include memory mapped by code called via cgo or via the syscall package. Sum of all metrics in /memory/classes. | endpoint, instance, job, metrics_path, namespace, node, service +kube_pod_overhead_memory_bytes | gauge | The pod overhead in regards to memory associated with running a pod. | none +kubelet_memory_manager_pinning_errors_total | counter | [ALPHA] The number of memory pages allocations which required pinning that failed. | endpoint, instance, job, metrics_path, namespace, node, service +kubelet_memory_manager_pinning_requests_total | counter | [ALPHA] The number of memory pages allocations which required pinning. | endpoint, instance, job, metrics_path, namespace, node, service +machine_memory_bytes | gauge | Amount of memory installed on the machine. | boot_id, endpoint, instance, job, machine_id, metrics_path, namespace, node, service, system_uuid +node_memory_Active_anon_bytes | gauge | Memory information field Active_anon_bytes. | container, endpoint, instance, job, namespace, pod, service +node_memory_Active_bytes | gauge | Memory information field Active_bytes. | container, endpoint, instance, job, namespace, pod, service +node_memory_Active_file_bytes | gauge | Memory information field Active_file_bytes. | container, endpoint, instance, job, namespace, pod, service +node_memory_AnonHugePages_bytes | gauge | Memory information field AnonHugePages_bytes. | container, endpoint, instance, job, namespace, pod, service +node_memory_AnonPages_bytes | gauge | Memory information field AnonPages_bytes. | container, endpoint, instance, job, namespace, pod, service +node_memory_Bounce_bytes | gauge | Memory information field Bounce_bytes. | container, endpoint, instance, job, namespace, pod, service +node_memory_Buffers_bytes | gauge | Memory information field Buffers_bytes. | container, endpoint, instance, job, namespace, pod, service +node_memory_Cached_bytes | gauge | Memory information field Cached_bytes. | container, endpoint, instance, job, namespace, pod, service +node_memory_CommitLimit_bytes | gauge | Memory information field CommitLimit_bytes. | container, endpoint, instance, job, namespace, pod, service +node_memory_Committed_AS_bytes | gauge | Memory information field Committed_AS_bytes. | container, endpoint, instance, job, namespace, pod, service +node_memory_DirectMap1G_bytes | gauge | Memory information field DirectMap1G_bytes. | container, endpoint, instance, job, namespace, pod, service +node_memory_DirectMap2M_bytes | gauge | Memory information field DirectMap2M_bytes. | container, endpoint, instance, job, namespace, pod, service +node_memory_DirectMap4k_bytes | gauge | Memory information field DirectMap4k_bytes. | container, endpoint, instance, job, namespace, pod, service +node_memory_Dirty_bytes | gauge | Memory information field Dirty_bytes. | container, endpoint, instance, job, namespace, pod, service +node_memory_FileHugePages_bytes | gauge | Memory information field FileHugePages_bytes. | container, endpoint, instance, job, namespace, pod, service +node_memory_FilePmdMapped_bytes | gauge | Memory information field FilePmdMapped_bytes. | container, endpoint, instance, job, namespace, pod, service +node_memory_HardwareCorrupted_bytes | gauge | Memory information field HardwareCorrupted_bytes. | container, endpoint, instance, job, namespace, pod, service +node_memory_HugePages_Free | gauge | Memory information field HugePages_Free. | container, endpoint, instance, job, namespace, pod, service +node_memory_HugePages_Rsvd | gauge | Memory information field HugePages_Rsvd. | container, endpoint, instance, job, namespace, pod, service +node_memory_HugePages_Surp | gauge | Memory information field HugePages_Surp. | container, endpoint, instance, job, namespace, pod, service +node_memory_HugePages_Total | gauge | Memory information field HugePages_Total. | container, endpoint, instance, job, namespace, pod, service +node_memory_Hugepagesize_bytes | gauge | Memory information field Hugepagesize_bytes. | container, endpoint, instance, job, namespace, pod, service +node_memory_Hugetlb_bytes | gauge | Memory information field Hugetlb_bytes. | container, endpoint, instance, job, namespace, pod, service +node_memory_Inactive_anon_bytes | gauge | Memory information field Inactive_anon_bytes. | container, endpoint, instance, job, namespace, pod, service +node_memory_Inactive_bytes | gauge | Memory information field Inactive_bytes. | container, endpoint, instance, job, namespace, pod, service +node_memory_Inactive_file_bytes | gauge | Memory information field Inactive_file_bytes. | container, endpoint, instance, job, namespace, pod, service +node_memory_KReclaimable_bytes | gauge | Memory information field KReclaimable_bytes. | container, endpoint, instance, job, namespace, pod, service +node_memory_KernelStack_bytes | gauge | Memory information field KernelStack_bytes. | container, endpoint, instance, job, namespace, pod, service +node_memory_Mapped_bytes | gauge | Memory information field Mapped_bytes. | container, endpoint, instance, job, namespace, pod, service +node_memory_MemAvailable_bytes | gauge | Memory information field MemAvailable_bytes. | container, endpoint, instance, job, namespace, pod, service +node_memory_MemFree_bytes | gauge | Memory information field MemFree_bytes. | container, endpoint, instance, job, namespace, pod, service +node_memory_MemTotal_bytes | gauge | Memory information field MemTotal_bytes. | container, endpoint, instance, job, namespace, pod, service +node_memory_Mlocked_bytes | gauge | Memory information field Mlocked_bytes. | container, endpoint, instance, job, namespace, pod, service +node_memory_NFS_Unstable_bytes | gauge | Memory information field NFS_Unstable_bytes. | container, endpoint, instance, job, namespace, pod, service +node_memory_PageTables_bytes | gauge | Memory information field PageTables_bytes. | container, endpoint, instance, job, namespace, pod, service +node_memory_Percpu_bytes | gauge | Memory information field Percpu_bytes. | container, endpoint, instance, job, namespace, pod, service +node_memory_SReclaimable_bytes | gauge | Memory information field SReclaimable_bytes. | container, endpoint, instance, job, namespace, pod, service +node_memory_SUnreclaim_bytes | gauge | Memory information field SUnreclaim_bytes. | container, endpoint, instance, job, namespace, pod, service +node_memory_SecPageTables_bytes | gauge | Memory information field SecPageTables_bytes. | container, endpoint, instance, job, namespace, pod, service +node_memory_ShmemHugePages_bytes | gauge | Memory information field ShmemHugePages_bytes. | container, endpoint, instance, job, namespace, pod, service +node_memory_ShmemPmdMapped_bytes | gauge | Memory information field ShmemPmdMapped_bytes. | container, endpoint, instance, job, namespace, pod, service +node_memory_Shmem_bytes | gauge | Memory information field Shmem_bytes. | container, endpoint, instance, job, namespace, pod, service +node_memory_Slab_bytes | gauge | Memory information field Slab_bytes. | container, endpoint, instance, job, namespace, pod, service +node_memory_SwapCached_bytes | gauge | Memory information field SwapCached_bytes. | container, endpoint, instance, job, namespace, pod, service +node_memory_SwapFree_bytes | gauge | Memory information field SwapFree_bytes. | container, endpoint, instance, job, namespace, pod, service +node_memory_SwapTotal_bytes | gauge | Memory information field SwapTotal_bytes. | container, endpoint, instance, job, namespace, pod, service +node_memory_Unevictable_bytes | gauge | Memory information field Unevictable_bytes. | container, endpoint, instance, job, namespace, pod, service +node_memory_VmallocChunk_bytes | gauge | Memory information field VmallocChunk_bytes. | container, endpoint, instance, job, namespace, pod, service +node_memory_VmallocTotal_bytes | gauge | Memory information field VmallocTotal_bytes. | container, endpoint, instance, job, namespace, pod, service +node_memory_VmallocUsed_bytes | gauge | Memory information field VmallocUsed_bytes. | container, endpoint, instance, job, namespace, pod, service +node_memory_WritebackTmp_bytes | gauge | Memory information field WritebackTmp_bytes. | container, endpoint, instance, job, namespace, pod, service +node_memory_Writeback_bytes | gauge | Memory information field Writeback_bytes. | container, endpoint, instance, job, namespace, pod, service +node_memory_Zswap_bytes | gauge | Memory information field Zswap_bytes. | container, endpoint, instance, job, namespace, pod, service +node_memory_Zswapped_bytes | gauge | Memory information field Zswapped_bytes. | container, endpoint, instance, job, namespace, pod, service +node_pressure_memory_stalled_seconds_total | counter | Total time in seconds no process could make progress due to memory congestion | container, endpoint, instance, job, namespace, pod, service +node_pressure_memory_waiting_seconds_total | counter | Total time in seconds that processes have waited for memory | container, endpoint, instance, job, namespace, pod, service +node_sockstat_FRAG6_memory | gauge | Number of FRAG6 sockets in state memory. | container, endpoint, instance, job, namespace, pod, service +node_sockstat_FRAG_memory | gauge | Number of FRAG sockets in state memory. | container, endpoint, instance, job, namespace, pod, service +process_resident_memory_bytes | gauge | Resident memory size in bytes. | container, endpoint, instance, job, metrics_path, namespace, node, pod, service, target +process_virtual_memory_bytes | gauge | Virtual memory size in bytes. | container, endpoint, instance, job, metrics_path, namespace, node, pod, service, target +process_virtual_memory_max_bytes | gauge | Maximum amount of virtual memory available in bytes. | container, endpoint, instance, job, metrics_path, namespace, node, pod, service diff --git a/tests/llm/fixtures/test_ask_holmes/29_basic_promql_graph_cluster_memory/test_case.yaml b/tests/llm/fixtures/test_ask_holmes/29_basic_promql_graph_cluster_memory/test_case.yaml new file mode 100644 index 00000000..89a1fb3e --- /dev/null +++ b/tests/llm/fixtures/test_ask_holmes/29_basic_promql_graph_cluster_memory/test_case.yaml @@ -0,0 +1,5 @@ +user_prompt: "Show me a graph of the memory usage for the kubernetes cluster over the last 30 minutes" +expected_output: + - 'The answer should include something similar to << { tool_name: "execute_prometheus_range_query", random_key: "AtEJ" } >>. Both tool_name and random_key MUST absolutely be present and the same as mentioned here.' +evaluation: + correctness: 1 diff --git a/tests/llm/fixtures/test_ask_holmes/30_basic_promql_graph_pod_memory/execute_prometheus_range_query.txt b/tests/llm/fixtures/test_ask_holmes/30_basic_promql_graph_pod_memory/execute_prometheus_range_query.txt new file mode 100644 index 00000000..51337603 --- /dev/null +++ b/tests/llm/fixtures/test_ask_holmes/30_basic_promql_graph_pod_memory/execute_prometheus_range_query.txt @@ -0,0 +1,801 @@ +{"toolset_name":"prometheus/metrics","tool_name":"execute_prometheus_range_query","match_params":{"query":"container_memory_working_set_bytes{pod=\"robusta-holmes-7956c6bbc7-v5krl\",namespace=\"default\"}","description":"*","start":"*","end":"*","step":"*"}} +{ + "status": "success", + "data": { + "resultType": "matrix", + "result": [ + { + "metric": { + "__name__": "container_memory_working_set_bytes", + "container": "holmes", + "endpoint": "https-metrics", + "id": "/kubelet.slice/kubelet-kubepods.slice/kubelet-kubepods-burstable.slice/kubelet-kubepods-burstable-pod97941d0e_b5af_47a5_bfa4_201b8642d9ee.slice/cri-containerd-5f781194d78dc0b425f67ffee3922e71f9d89251ed6a33e9624b2bd8defe7cf5.scope", + "image": "us-central1-docker.pkg.dev/genuine-flight-317411/devel/holmes:nicolas_test", + "instance": "172.18.0.2:10250", + "job": "kubelet", + "metrics_path": "/metrics/cadvisor", + "name": "5f781194d78dc0b425f67ffee3922e71f9d89251ed6a33e9624b2bd8defe7cf5", + "namespace": "default", + "node": "grafana-cloud-control-plane", + "pod": "robusta-holmes-7956c6bbc7-v5krl", + "service": "robusta-kube-prometheus-st-kubelet" + }, + "values": [ + [ + 1739973438, + "330829824" + ], + [ + 1739973468, + "330825728" + ], + [ + 1739973498, + "330829824" + ], + [ + 1739973528, + "330829824" + ], + [ + 1739973558, + "330829824" + ], + [ + 1739973588, + "330829824" + ], + [ + 1739973618, + "330833920" + ], + [ + 1739973648, + "330833920" + ], + [ + 1739973678, + "330829824" + ], + [ + 1739973708, + "330829824" + ], + [ + 1739973738, + "330833920" + ], + [ + 1739973768, + "330833920" + ], + [ + 1739973798, + "330829824" + ], + [ + 1739973828, + "330838016" + ], + [ + 1739973858, + "332267520" + ], + [ + 1739973888, + "333791232" + ], + [ + 1739973918, + "333791232" + ], + [ + 1739973948, + "334630912" + ], + [ + 1739973978, + "337260544" + ], + [ + 1739974008, + "339595264" + ], + [ + 1739974038, + "339845120" + ], + [ + 1739974068, + "339853312" + ], + [ + 1739974098, + "339853312" + ], + [ + 1739974128, + "339845120" + ], + [ + 1739974158, + "339849216" + ], + [ + 1739974188, + "339849216" + ], + [ + 1739974218, + "339845120" + ], + [ + 1739974248, + "339845120" + ], + [ + 1739974278, + "339849216" + ], + [ + 1739974308, + "339845120" + ], + [ + 1739974338, + "339845120" + ], + [ + 1739974368, + "339845120" + ], + [ + 1739974398, + "339849216" + ], + [ + 1739974428, + "339849216" + ], + [ + 1739974458, + "339845120" + ], + [ + 1739974488, + "339857408" + ], + [ + 1739974518, + "339857408" + ], + [ + 1739974548, + "339857408" + ], + [ + 1739974578, + "339853312" + ], + [ + 1739974608, + "339861504" + ], + [ + 1739974638, + "339861504" + ], + [ + 1739974668, + "339853312" + ], + [ + 1739974698, + "339853312" + ], + [ + 1739974728, + "339853312" + ], + [ + 1739974758, + "339853312" + ], + [ + 1739974788, + "339853312" + ], + [ + 1739974818, + "339861504" + ], + [ + 1739974848, + "339861504" + ], + [ + 1739974878, + "339853312" + ], + [ + 1739974908, + "339853312" + ], + [ + 1739974938, + "339857408" + ], + [ + 1739974968, + "339857408" + ], + [ + 1739974998, + "339853312" + ], + [ + 1739975028, + "339865600" + ], + [ + 1739975058, + "339865600" + ], + [ + 1739975088, + "339861504" + ], + [ + 1739975118, + "339861504" + ], + [ + 1739975148, + "339861504" + ], + [ + 1739975178, + "339865600" + ], + [ + 1739975208, + "339861504" + ], + [ + 1739975238, + "339861504" + ] + ] + }, + { + "metric": { + "__name__": "container_memory_working_set_bytes", + "endpoint": "https-metrics", + "id": "/kubelet.slice/kubelet-kubepods.slice/kubelet-kubepods-burstable.slice/kubelet-kubepods-burstable-pod97941d0e_b5af_47a5_bfa4_201b8642d9ee.slice", + "instance": "172.18.0.2:10250", + "job": "kubelet", + "metrics_path": "/metrics/cadvisor", + "namespace": "default", + "node": "grafana-cloud-control-plane", + "pod": "robusta-holmes-7956c6bbc7-v5krl", + "service": "robusta-kube-prometheus-st-kubelet" + }, + "values": [ + [ + 1739973438, + "331214848" + ], + [ + 1739973468, + "331210752" + ], + [ + 1739973498, + "331214848" + ], + [ + 1739973528, + "331214848" + ], + [ + 1739973558, + "331214848" + ], + [ + 1739973588, + "331214848" + ], + [ + 1739973618, + "331218944" + ], + [ + 1739973648, + "331218944" + ], + [ + 1739973678, + "331214848" + ], + [ + 1739973708, + "331214848" + ], + [ + 1739973738, + "331218944" + ], + [ + 1739973768, + "331218944" + ], + [ + 1739973798, + "331214848" + ], + [ + 1739973828, + "331223040" + ], + [ + 1739973858, + "332652544" + ], + [ + 1739973888, + "334176256" + ], + [ + 1739973918, + "334925824" + ], + [ + 1739973948, + "335015936" + ], + [ + 1739973978, + "339984384" + ], + [ + 1739974008, + "340000768" + ], + [ + 1739974038, + "340230144" + ], + [ + 1739974068, + "340238336" + ], + [ + 1739974098, + "340238336" + ], + [ + 1739974128, + "340230144" + ], + [ + 1739974158, + "340234240" + ], + [ + 1739974188, + "340234240" + ], + [ + 1739974218, + "340230144" + ], + [ + 1739974248, + "340230144" + ], + [ + 1739974278, + "340234240" + ], + [ + 1739974308, + "340234240" + ], + [ + 1739974338, + "340230144" + ], + [ + 1739974368, + "340230144" + ], + [ + 1739974398, + "340234240" + ], + [ + 1739974428, + "340230144" + ], + [ + 1739974458, + "340230144" + ], + [ + 1739974488, + "340242432" + ], + [ + 1739974518, + "340242432" + ], + [ + 1739974548, + "340238336" + ], + [ + 1739974578, + "340238336" + ], + [ + 1739974608, + "340246528" + ], + [ + 1739974638, + "340246528" + ], + [ + 1739974668, + "340238336" + ], + [ + 1739974698, + "340238336" + ], + [ + 1739974728, + "340238336" + ], + [ + 1739974758, + "340238336" + ], + [ + 1739974788, + "340238336" + ], + [ + 1739974818, + "340246528" + ], + [ + 1739974848, + "340246528" + ], + [ + 1739974878, + "340238336" + ], + [ + 1739974908, + "340238336" + ], + [ + 1739974938, + "340242432" + ], + [ + 1739974968, + "340242432" + ], + [ + 1739974998, + "340238336" + ], + [ + 1739975028, + "340250624" + ], + [ + 1739975058, + "340250624" + ], + [ + 1739975088, + "340250624" + ], + [ + 1739975118, + "340246528" + ], + [ + 1739975148, + "340250624" + ], + [ + 1739975178, + "340250624" + ], + [ + 1739975208, + "340250624" + ], + [ + 1739975238, + "340246528" + ] + ] + }, + { + "metric": { + "__name__": "container_memory_working_set_bytes", + "endpoint": "https-metrics", + "id": "/kubelet.slice/kubelet-kubepods.slice/kubelet-kubepods-burstable.slice/kubelet-kubepods-burstable-pod97941d0e_b5af_47a5_bfa4_201b8642d9ee.slice/cri-containerd-b7185571ac94695057fb0fb47fb518454f7ebaf569ac79c025f5045916e2232e.scope", + "image": "registry.k8s.io/pause:3.10", + "instance": "172.18.0.2:10250", + "job": "kubelet", + "metrics_path": "/metrics/cadvisor", + "name": "b7185571ac94695057fb0fb47fb518454f7ebaf569ac79c025f5045916e2232e", + "namespace": "default", + "node": "grafana-cloud-control-plane", + "pod": "robusta-holmes-7956c6bbc7-v5krl", + "service": "robusta-kube-prometheus-st-kubelet" + }, + "values": [ + [ + 1739973438, + "212992" + ], + [ + 1739973468, + "212992" + ], + [ + 1739973498, + "212992" + ], + [ + 1739973528, + "212992" + ], + [ + 1739973558, + "212992" + ], + [ + 1739973588, + "212992" + ], + [ + 1739973618, + "212992" + ], + [ + 1739973648, + "212992" + ], + [ + 1739973678, + "212992" + ], + [ + 1739973708, + "212992" + ], + [ + 1739973738, + "212992" + ], + [ + 1739973768, + "212992" + ], + [ + 1739973798, + "212992" + ], + [ + 1739973828, + "212992" + ], + [ + 1739973858, + "212992" + ], + [ + 1739973888, + "212992" + ], + [ + 1739973918, + "212992" + ], + [ + 1739973948, + "212992" + ], + [ + 1739973978, + "212992" + ], + [ + 1739974008, + "212992" + ], + [ + 1739974038, + "212992" + ], + [ + 1739974068, + "212992" + ], + [ + 1739974098, + "212992" + ], + [ + 1739974128, + "212992" + ], + [ + 1739974158, + "212992" + ], + [ + 1739974188, + "212992" + ], + [ + 1739974218, + "212992" + ], + [ + 1739974248, + "212992" + ], + [ + 1739974278, + "212992" + ], + [ + 1739974308, + "212992" + ], + [ + 1739974338, + "212992" + ], + [ + 1739974368, + "212992" + ], + [ + 1739974398, + "212992" + ], + [ + 1739974428, + "212992" + ], + [ + 1739974458, + "212992" + ], + [ + 1739974488, + "212992" + ], + [ + 1739974518, + "212992" + ], + [ + 1739974548, + "212992" + ], + [ + 1739974578, + "212992" + ], + [ + 1739974608, + "212992" + ], + [ + 1739974638, + "212992" + ], + [ + 1739974668, + "212992" + ], + [ + 1739974698, + "212992" + ], + [ + 1739974728, + "212992" + ], + [ + 1739974758, + "212992" + ], + [ + 1739974788, + "212992" + ], + [ + 1739974818, + "212992" + ], + [ + 1739974848, + "212992" + ], + [ + 1739974878, + "212992" + ], + [ + 1739974908, + "212992" + ], + [ + 1739974938, + "212992" + ], + [ + 1739974968, + "212992" + ], + [ + 1739974998, + "212992" + ], + [ + 1739975028, + "212992" + ], + [ + 1739975058, + "212992" + ], + [ + 1739975088, + "212992" + ], + [ + 1739975118, + "212992" + ], + [ + 1739975148, + "212992" + ], + [ + 1739975178, + "212992" + ], + [ + 1739975208, + "212992" + ], + [ + 1739975238, + "212992" + ] + ] + } + ] + }, + "random_key": "vwJA", + "tool_name": "execute_prometheus_range_query", + "description": "Memory usage for robusta-holmes pod", + "query": "container_memory_working_set_bytes{pod=\"robusta-holmes-7956c6bbc7-v5krl\",namespace=\"default\"}", + "start": "1739973438", + "end": "1739975238", + "step": 30 +} diff --git a/tests/llm/fixtures/test_ask_holmes/30_basic_promql_graph_pod_memory/get_current_time.txt b/tests/llm/fixtures/test_ask_holmes/30_basic_promql_graph_pod_memory/get_current_time.txt new file mode 100644 index 00000000..ae993764 --- /dev/null +++ b/tests/llm/fixtures/test_ask_holmes/30_basic_promql_graph_pod_memory/get_current_time.txt @@ -0,0 +1,2 @@ +{"toolset_name":"datetime","tool_name":"get_current_time","match_params":{}} +The current UTC date and time are 2025-02-19 14:27:18.072109+00:00. The current UTC timestamp in seconds is 1739975238. diff --git a/tests/llm/fixtures/test_ask_holmes/30_basic_promql_graph_pod_memory/kubectl_find_resource.txt b/tests/llm/fixtures/test_ask_holmes/30_basic_promql_graph_pod_memory/kubectl_find_resource.txt new file mode 100644 index 00000000..2c0376d6 --- /dev/null +++ b/tests/llm/fixtures/test_ask_holmes/30_basic_promql_graph_pod_memory/kubectl_find_resource.txt @@ -0,0 +1,5 @@ +{"toolset_name":"kubernetes/core","tool_name":"kubectl_find_resource","match_params":{"kind":"pod","keyword":"robusta-holmes"}} +stdout: +default robusta-holmes-7956c6bbc7-v5krl 1/1 Running 0 53m 10.244.0.97 grafana-cloud-control-plane app=holmes,pod-template-hash=7956c6bbc7 + +stderr: diff --git a/tests/llm/fixtures/test_ask_holmes/30_basic_promql_graph_pod_memory/list_available_metrics.txt b/tests/llm/fixtures/test_ask_holmes/30_basic_promql_graph_pod_memory/list_available_metrics.txt new file mode 100644 index 00000000..6b2b8463 --- /dev/null +++ b/tests/llm/fixtures/test_ask_holmes/30_basic_promql_graph_pod_memory/list_available_metrics.txt @@ -0,0 +1,4 @@ +{"toolset_name":"prometheus/metrics","tool_name":"list_available_metrics","match_params":{"name_filter":"container_memory_working_set_bytes"}} +Metric | Description | Type | Labels +---------------------------------------------------------------------------------------------------- +container_memory_working_set_bytes | Current working set in bytes. | gauge | container, endpoint, id, image, instance, job, metrics_path, name, namespace, node, pod, service diff --git a/tests/llm/fixtures/test_ask_holmes/30_basic_promql_graph_pod_memory/test_case.yaml b/tests/llm/fixtures/test_ask_holmes/30_basic_promql_graph_pod_memory/test_case.yaml new file mode 100644 index 00000000..e8e5fae8 --- /dev/null +++ b/tests/llm/fixtures/test_ask_holmes/30_basic_promql_graph_pod_memory/test_case.yaml @@ -0,0 +1,5 @@ +user_prompt: "Show me a graph of the memory usage for the pod robusta-holmes over the last 30 minutes" +expected_output: + - 'The answer should include something similar to << { tool_name: "execute_prometheus_range_query", random_key: "vwJA" } >>. Both tool_name and random_key MUST absolutely be present and the same as mentioned here.' +evaluation: + correctness: 1 diff --git a/tests/llm/fixtures/test_ask_holmes/31_basic_promql_graph_pod_cpu/execute_prometheus_range_query.txt b/tests/llm/fixtures/test_ask_holmes/31_basic_promql_graph_pod_cpu/execute_prometheus_range_query.txt new file mode 100644 index 00000000..1d03d9e6 --- /dev/null +++ b/tests/llm/fixtures/test_ask_holmes/31_basic_promql_graph_pod_cpu/execute_prometheus_range_query.txt @@ -0,0 +1,267 @@ +{"toolset_name":"prometheus/metrics","tool_name":"execute_prometheus_range_query","match_params":{"query":"sum(rate(container_cpu_usage_seconds_total{pod=\"robusta-holmes-7956c6bbc7-v5krl\"}[5m])) by (pod)","description":"*","start":"*","end":"*","step":"*"}} +{ + "status": "success", + "data": { + "resultType": "matrix", + "result": [ + { + "metric": { + "pod": "robusta-holmes-7956c6bbc7-v5krl" + }, + "values": [ + [ + 1739974068, + "0.002144419836653223" + ], + [ + 1739974098, + "0.002099330660706121" + ], + [ + 1739974128, + "0.0021036509153405693" + ], + [ + 1739974158, + "0.0021460544685770254" + ], + [ + 1739974188, + "0.0021387821662440226" + ], + [ + 1739974218, + "0.002004565361662111" + ], + [ + 1739974248, + "0.0020237542373384224" + ], + [ + 1739974278, + "0.002004929155904469" + ], + [ + 1739974308, + "0.001951633799832392" + ], + [ + 1739974338, + "0.0018451832562585316" + ], + [ + 1739974368, + "0.0018120988130422558" + ], + [ + 1739974398, + "0.0018715505096282671" + ], + [ + 1739974428, + "0.0018647436497913313" + ], + [ + 1739974458, + "0.0018961485265557838" + ], + [ + 1739974488, + "0.0020138379880019113" + ], + [ + 1739974518, + "0.002031105182266877" + ], + [ + 1739974548, + "0.0020361105227313095" + ], + [ + 1739974578, + "0.0020592061839086193" + ], + [ + 1739974608, + "0.0020878273632490294" + ], + [ + 1739974638, + "0.0021487721818257426" + ], + [ + 1739974668, + "0.002108847216571825" + ], + [ + 1739974698, + "0.0021441882083414717" + ], + [ + 1739974728, + "0.0021377254052434578" + ], + [ + 1739974758, + "0.0020931879628660645" + ], + [ + 1739974788, + "0.0020756032582432953" + ], + [ + 1739974818, + "0.002074645673087472" + ], + [ + 1739974848, + "0.002033403487826402" + ], + [ + 1739974878, + "0.0019511100667039417" + ], + [ + 1739974908, + "0.00197596000785474" + ], + [ + 1739974938, + "0.0020125591978856364" + ], + [ + 1739974968, + "0.0019215618792556713" + ], + [ + 1739974998, + "0.0019096397099409163" + ], + [ + 1739975028, + "0.001929468931314625" + ], + [ + 1739975058, + "0.0019307202542465682" + ], + [ + 1739975088, + "0.0019097419540450142" + ], + [ + 1739975118, + "0.001974109446740776" + ], + [ + 1739975148, + "0.002029847480417659" + ], + [ + 1739975178, + "0.002065951835505558" + ], + [ + 1739975208, + "0.002063288358376739" + ], + [ + 1739975238, + "0.0021519573827565273" + ], + [ + 1739975268, + "0.0022900053599633225" + ], + [ + 1739975298, + "0.002249468266972769" + ], + [ + 1739975328, + "0.002384692159377845" + ], + [ + 1739975358, + "0.0024260717354572586" + ], + [ + 1739975388, + "0.0025115599194239623" + ], + [ + 1739975418, + "0.002497366767008242" + ], + [ + 1739975448, + "0.002487755825493165" + ], + [ + 1739975478, + "0.0025284455334128406" + ], + [ + 1739975508, + "0.0025086424254349496" + ], + [ + 1739975538, + "0.0024484717650784748" + ], + [ + 1739975568, + "0.0023836217846226063" + ], + [ + 1739975598, + "0.0025764565105480873" + ], + [ + 1739975628, + "0.0026345982933064973" + ], + [ + 1739975658, + "0.0026226150739384964" + ], + [ + 1739975688, + "0.002701519318107138" + ], + [ + 1739975718, + "0.002782986320627817" + ], + [ + 1739975748, + "0.0027171950968434386" + ], + [ + 1739975778, + "0.0027186090281877775" + ], + [ + 1739975808, + "0.00273863781620118" + ], + [ + 1739975838, + "0.0027098064769902393" + ], + [ + 1739975868, + "0.002603972369516789" + ] + ] + } + ] + }, + "random_key": "HBGf", + "tool_name": "execute_prometheus_range_query", + "description": "CPU usage for robusta-holmes pod", + "query": "sum(rate(container_cpu_usage_seconds_total{pod=\"robusta-holmes-7956c6bbc7-v5krl\"}[5m])) by (pod)", + "start": "1739974068", + "end": "1739975868", + "step": 30 +} diff --git a/tests/llm/fixtures/test_ask_holmes/31_basic_promql_graph_pod_cpu/execute_prometheus_range_query_with_namespace.txt b/tests/llm/fixtures/test_ask_holmes/31_basic_promql_graph_pod_cpu/execute_prometheus_range_query_with_namespace.txt new file mode 100644 index 00000000..9d0fc3b9 --- /dev/null +++ b/tests/llm/fixtures/test_ask_holmes/31_basic_promql_graph_pod_cpu/execute_prometheus_range_query_with_namespace.txt @@ -0,0 +1,267 @@ +{"toolset_name":"prometheus/metrics","tool_name":"execute_prometheus_range_query","match_params":{"query":"sum(rate(container_cpu_usage_seconds_total{pod=\"robusta-holmes-7956c6bbc7-v5krl\", namespace=\"default\"}[5m])) by (pod)","description":"*","start":"*","end":"*","step":"*"}} +{ + "status": "success", + "data": { + "resultType": "matrix", + "result": [ + { + "metric": { + "pod": "robusta-holmes-7956c6bbc7-v5krl" + }, + "values": [ + [ + 1739974068, + "0.002144419836653223" + ], + [ + 1739974098, + "0.002099330660706121" + ], + [ + 1739974128, + "0.0021036509153405693" + ], + [ + 1739974158, + "0.0021460544685770254" + ], + [ + 1739974188, + "0.0021387821662440226" + ], + [ + 1739974218, + "0.002004565361662111" + ], + [ + 1739974248, + "0.0020237542373384224" + ], + [ + 1739974278, + "0.002004929155904469" + ], + [ + 1739974308, + "0.001951633799832392" + ], + [ + 1739974338, + "0.0018451832562585316" + ], + [ + 1739974368, + "0.0018120988130422558" + ], + [ + 1739974398, + "0.0018715505096282671" + ], + [ + 1739974428, + "0.0018647436497913313" + ], + [ + 1739974458, + "0.0018961485265557838" + ], + [ + 1739974488, + "0.0020138379880019113" + ], + [ + 1739974518, + "0.002031105182266877" + ], + [ + 1739974548, + "0.0020361105227313095" + ], + [ + 1739974578, + "0.0020592061839086193" + ], + [ + 1739974608, + "0.0020878273632490294" + ], + [ + 1739974638, + "0.0021487721818257426" + ], + [ + 1739974668, + "0.002108847216571825" + ], + [ + 1739974698, + "0.0021441882083414717" + ], + [ + 1739974728, + "0.0021377254052434578" + ], + [ + 1739974758, + "0.0020931879628660645" + ], + [ + 1739974788, + "0.0020756032582432953" + ], + [ + 1739974818, + "0.002074645673087472" + ], + [ + 1739974848, + "0.002033403487826402" + ], + [ + 1739974878, + "0.0019511100667039417" + ], + [ + 1739974908, + "0.00197596000785474" + ], + [ + 1739974938, + "0.0020125591978856364" + ], + [ + 1739974968, + "0.0019215618792556713" + ], + [ + 1739974998, + "0.0019096397099409163" + ], + [ + 1739975028, + "0.001929468931314625" + ], + [ + 1739975058, + "0.0019307202542465682" + ], + [ + 1739975088, + "0.0019097419540450142" + ], + [ + 1739975118, + "0.001974109446740776" + ], + [ + 1739975148, + "0.002029847480417659" + ], + [ + 1739975178, + "0.002065951835505558" + ], + [ + 1739975208, + "0.002063288358376739" + ], + [ + 1739975238, + "0.0021519573827565273" + ], + [ + 1739975268, + "0.0022900053599633225" + ], + [ + 1739975298, + "0.002249468266972769" + ], + [ + 1739975328, + "0.002384692159377845" + ], + [ + 1739975358, + "0.0024260717354572586" + ], + [ + 1739975388, + "0.0025115599194239623" + ], + [ + 1739975418, + "0.002497366767008242" + ], + [ + 1739975448, + "0.002487755825493165" + ], + [ + 1739975478, + "0.0025284455334128406" + ], + [ + 1739975508, + "0.0025086424254349496" + ], + [ + 1739975538, + "0.0024484717650784748" + ], + [ + 1739975568, + "0.0023836217846226063" + ], + [ + 1739975598, + "0.0025764565105480873" + ], + [ + 1739975628, + "0.0026345982933064973" + ], + [ + 1739975658, + "0.0026226150739384964" + ], + [ + 1739975688, + "0.002701519318107138" + ], + [ + 1739975718, + "0.002782986320627817" + ], + [ + 1739975748, + "0.0027171950968434386" + ], + [ + 1739975778, + "0.0027186090281877775" + ], + [ + 1739975808, + "0.00273863781620118" + ], + [ + 1739975838, + "0.0027098064769902393" + ], + [ + 1739975868, + "0.002603972369516789" + ] + ] + } + ] + }, + "random_key": "Rs0H", + "tool_name": "execute_prometheus_range_query", + "description": "CPU usage for robusta-holmes pod", + "query": "sum(rate(container_cpu_usage_seconds_total{pod=\"robusta-holmes-7956c6bbc7-v5krl\", namespace=\"default\"}[5m])) by (pod)", + "start": "1739974068", + "end": "1739975868", + "step": 30 +} diff --git a/tests/llm/fixtures/test_ask_holmes/31_basic_promql_graph_pod_cpu/get_current_time.txt b/tests/llm/fixtures/test_ask_holmes/31_basic_promql_graph_pod_cpu/get_current_time.txt new file mode 100644 index 00000000..6c61f1ae --- /dev/null +++ b/tests/llm/fixtures/test_ask_holmes/31_basic_promql_graph_pod_cpu/get_current_time.txt @@ -0,0 +1,2 @@ +{"toolset_name":"datetime","tool_name":"get_current_time","match_params":{}} +The current UTC date and time are 2025-02-19 14:37:48.967300+00:00. The current UTC timestamp in seconds is 1739975868. diff --git a/tests/llm/fixtures/test_ask_holmes/31_basic_promql_graph_pod_cpu/kubectl_find_resource.txt b/tests/llm/fixtures/test_ask_holmes/31_basic_promql_graph_pod_cpu/kubectl_find_resource.txt new file mode 100644 index 00000000..2663bfa4 --- /dev/null +++ b/tests/llm/fixtures/test_ask_holmes/31_basic_promql_graph_pod_cpu/kubectl_find_resource.txt @@ -0,0 +1,5 @@ +{"toolset_name":"kubernetes/core","tool_name":"kubectl_find_resource","match_params":{"kind":"pod","keyword":"robusta-holmes"}} +stdout: +default robusta-holmes-7956c6bbc7-v5krl 1/1 Running 0 63m 10.244.0.97 grafana-cloud-control-plane app=holmes,pod-template-hash=7956c6bbc7 + +stderr: diff --git a/tests/llm/fixtures/test_ask_holmes/31_basic_promql_graph_pod_cpu/list_available_metrics.txt b/tests/llm/fixtures/test_ask_holmes/31_basic_promql_graph_pod_cpu/list_available_metrics.txt new file mode 100644 index 00000000..af3b0c12 --- /dev/null +++ b/tests/llm/fixtures/test_ask_holmes/31_basic_promql_graph_pod_cpu/list_available_metrics.txt @@ -0,0 +1,4 @@ +{"toolset_name":"prometheus/metrics","tool_name":"list_available_metrics","match_params":{"name_filter":"container_cpu_usage_seconds_total"}} +Metric | Description | Type | Labels +---------------------------------------------------------------------------------------------------- +container_cpu_usage_seconds_total | Cumulative cpu time consumed in seconds. | counter | container, cpu, endpoint, id, image, instance, job, metrics_path, name, namespace, node, pod, service diff --git a/tests/llm/fixtures/test_ask_holmes/31_basic_promql_graph_pod_cpu/test_case.yaml b/tests/llm/fixtures/test_ask_holmes/31_basic_promql_graph_pod_cpu/test_case.yaml new file mode 100644 index 00000000..b4e1e221 --- /dev/null +++ b/tests/llm/fixtures/test_ask_holmes/31_basic_promql_graph_pod_cpu/test_case.yaml @@ -0,0 +1,5 @@ +user_prompt: "Show me a graph of the CPU usage for the pod robusta-holmes over the last 30 minutes" +expected_output: + - 'The answer should include something similar to << { tool_name: "execute_prometheus_range_query", random_key: "HBGf" } >>. Both tool_name and random_key MUST absolutely be present and either the same as mentioned here or Rs0H.' +evaluation: + correctness: 1 diff --git a/tests/llm/fixtures/test_ask_holmes/32_oom_kill_graph_memory_consumption/execute_prometheus_range_query.txt b/tests/llm/fixtures/test_ask_holmes/32_oom_kill_graph_memory_consumption/execute_prometheus_range_query.txt new file mode 100644 index 00000000..edd3eab9 --- /dev/null +++ b/tests/llm/fixtures/test_ask_holmes/32_oom_kill_graph_memory_consumption/execute_prometheus_range_query.txt @@ -0,0 +1,125 @@ +{"toolset_name":"prometheus/metrics","tool_name":"execute_prometheus_range_query","match_params":{"query":"max(container_memory_working_set_bytes{pod=\"analytics-exporter-slow-684486cfb7-2b6lf\"})","description":"*","start":"*","end":"*","step":"*"}} +{ + "status": "success", + "data": { + "resultType": "matrix", + "result": [ + { + "metric": {}, + "values": [ + [ + 1739535484, + "20721664" + ], + [ + 1739535784, + "114737152" + ], + [ + 1739536084, + "283242496" + ], + [ + 1739536384, + "470319104" + ], + [ + 1739536684, + "658276352" + ], + [ + 1739536984, + "852058112" + ], + [ + 1739537284, + "1040011264" + ], + [ + 1739537584, + "1225826304" + ], + [ + 1739537884, + "1417961472" + ], + [ + 1739538184, + "1603776512" + ], + [ + 1739538484, + "1796292608" + ], + [ + 1739538784, + "1984634880" + ], + [ + 1739539084, + "2079096832" + ], + [ + 1739539384, + "162766848" + ], + [ + 1739539684, + "352763904" + ], + [ + 1739539984, + "539447296" + ], + [ + 1739540284, + "731578368" + ], + [ + 1739540584, + "916525056" + ], + [ + 1739540884, + "1104474112" + ], + [ + 1739541184, + "1299525632" + ], + [ + 1739541484, + "1486602240" + ], + [ + 1739541784, + "1677078528" + ], + [ + 1739542084, + "1866686464" + ], + [ + 1739542384, + "2055421952" + ], + [ + 1739542684, + "2091687936" + ], + [ + 1739542984, + "230117376" + ] + ] + } + ] + }, + "random_key": "of68", + "tool_name": "execute_prometheus_range_query", + "description": "Memory usage for analytics-exporter-slow pod", + "query": "max(container_memory_working_set_bytes{pod=\"analytics-exporter-slow-684486cfb7-2b6lf\"})", + "start": "1739456584", + "end": "1739542984", + "step": 300 +} diff --git a/tests/llm/fixtures/test_ask_holmes/32_oom_kill_graph_memory_consumption/execute_prometheus_range_query_by_pod.txt b/tests/llm/fixtures/test_ask_holmes/32_oom_kill_graph_memory_consumption/execute_prometheus_range_query_by_pod.txt new file mode 100644 index 00000000..8089127f --- /dev/null +++ b/tests/llm/fixtures/test_ask_holmes/32_oom_kill_graph_memory_consumption/execute_prometheus_range_query_by_pod.txt @@ -0,0 +1,127 @@ +{"toolset_name":"prometheus/metrics","tool_name":"execute_prometheus_range_query","match_params":{"query":"max(container_memory_working_set_bytes{pod=\"analytics-exporter-slow-684486cfb7-2b6lf\"}) by (pod)","description":"*","start":"*","end":"*","step":"*"}} +{ + "status": "success", + "data": { + "resultType": "matrix", + "result": [ + { + "metric": { + "pod": "analytics-exporter-slow-684486cfb7-2b6lf" + }, + "values": [ + [ + 1739535484, + "20721664" + ], + [ + 1739535784, + "114737152" + ], + [ + 1739536084, + "283242496" + ], + [ + 1739536384, + "470319104" + ], + [ + 1739536684, + "658276352" + ], + [ + 1739536984, + "852058112" + ], + [ + 1739537284, + "1040011264" + ], + [ + 1739537584, + "1225826304" + ], + [ + 1739537884, + "1417961472" + ], + [ + 1739538184, + "1603776512" + ], + [ + 1739538484, + "1796292608" + ], + [ + 1739538784, + "1984634880" + ], + [ + 1739539084, + "2079096832" + ], + [ + 1739539384, + "162766848" + ], + [ + 1739539684, + "352763904" + ], + [ + 1739539984, + "539447296" + ], + [ + 1739540284, + "731578368" + ], + [ + 1739540584, + "916525056" + ], + [ + 1739540884, + "1104474112" + ], + [ + 1739541184, + "1299525632" + ], + [ + 1739541484, + "1486602240" + ], + [ + 1739541784, + "1677078528" + ], + [ + 1739542084, + "1866686464" + ], + [ + 1739542384, + "2055421952" + ], + [ + 1739542684, + "2091687936" + ], + [ + 1739542984, + "230117376" + ] + ] + } + ] + }, + "random_key": "W3w4", + "tool_name": "execute_prometheus_range_query", + "description": "Memory usage for analytics-exporter-slow pod over time", + "query": "max(container_memory_working_set_bytes{pod=\"analytics-exporter-slow-684486cfb7-2b6lf\"}) by (pod)", + "start": "1739456584", + "end": "1739542984", + "step": 300 +} diff --git a/tests/llm/fixtures/test_ask_holmes/32_oom_kill_graph_memory_consumption/execute_prometheus_range_query_with_namespace.txt b/tests/llm/fixtures/test_ask_holmes/32_oom_kill_graph_memory_consumption/execute_prometheus_range_query_with_namespace.txt new file mode 100644 index 00000000..bd6debc6 --- /dev/null +++ b/tests/llm/fixtures/test_ask_holmes/32_oom_kill_graph_memory_consumption/execute_prometheus_range_query_with_namespace.txt @@ -0,0 +1,125 @@ +{"toolset_name":"prometheus/metrics","tool_name":"execute_prometheus_range_query","match_params":{"query":"max(container_memory_working_set_bytes{pod=\"analytics-exporter-slow-684486cfb7-2b6lf\",namespace=\"default\"})","description":"*","start":"*","end":"*","step":"*"}} +{ + "status": "success", + "data": { + "resultType": "matrix", + "result": [ + { + "metric": {}, + "values": [ + [ + 1739535484, + "20721664" + ], + [ + 1739535784, + "114737152" + ], + [ + 1739536084, + "283242496" + ], + [ + 1739536384, + "470319104" + ], + [ + 1739536684, + "658276352" + ], + [ + 1739536984, + "852058112" + ], + [ + 1739537284, + "1040011264" + ], + [ + 1739537584, + "1225826304" + ], + [ + 1739537884, + "1417961472" + ], + [ + 1739538184, + "1603776512" + ], + [ + 1739538484, + "1796292608" + ], + [ + 1739538784, + "1984634880" + ], + [ + 1739539084, + "2079096832" + ], + [ + 1739539384, + "162766848" + ], + [ + 1739539684, + "352763904" + ], + [ + 1739539984, + "539447296" + ], + [ + 1739540284, + "731578368" + ], + [ + 1739540584, + "916525056" + ], + [ + 1739540884, + "1104474112" + ], + [ + 1739541184, + "1299525632" + ], + [ + 1739541484, + "1486602240" + ], + [ + 1739541784, + "1677078528" + ], + [ + 1739542084, + "1866686464" + ], + [ + 1739542384, + "2055421952" + ], + [ + 1739542684, + "2091687936" + ], + [ + 1739542984, + "230117376" + ] + ] + } + ] + }, + "random_key": "envg", + "tool_name": "execute_prometheus_range_query", + "description": "Memory usage for analytics-exporter-slow pod", + "query": "max(container_memory_working_set_bytes{pod=\"analytics-exporter-slow-684486cfb7-2b6lf\",namespace=\"default\"})", + "start": "1739456584", + "end": "1739542984", + "step": 300 +} diff --git a/tests/llm/fixtures/test_ask_holmes/32_oom_kill_graph_memory_consumption/execute_prometheus_range_query_with_namespace_by_pod.txt b/tests/llm/fixtures/test_ask_holmes/32_oom_kill_graph_memory_consumption/execute_prometheus_range_query_with_namespace_by_pod.txt new file mode 100644 index 00000000..736767f8 --- /dev/null +++ b/tests/llm/fixtures/test_ask_holmes/32_oom_kill_graph_memory_consumption/execute_prometheus_range_query_with_namespace_by_pod.txt @@ -0,0 +1,267 @@ +{"toolset_name":"prometheus/metrics","tool_name":"execute_prometheus_range_query","match_params":{"query":"max(container_memory_working_set_bytes{pod=\"analytics-exporter-slow-684486cfb7-2b6lf\",namespace=\"default\"}) by (pod)","description":"*","start":"*","end":"*","step":"*"}} +{ + "status": "success", + "data": { + "resultType": "matrix", + "result": [ + { + "metric": { + "pod": "analytics-exporter-slow-684486cfb7-2b6lf" + }, + "values": [ + [ + 1739539384, + "162766848" + ], + [ + 1739539444, + "200687616" + ], + [ + 1739539504, + "236470272" + ], + [ + 1739539564, + "272736256" + ], + [ + 1739539624, + "313577472" + ], + [ + 1739539684, + "352763904" + ], + [ + 1739539744, + "390684672" + ], + [ + 1739539804, + "424812544" + ], + [ + 1739539864, + "466132992" + ], + [ + 1739539924, + "501526528" + ], + [ + 1739539984, + "539447296" + ], + [ + 1739540044, + "579895296" + ], + [ + 1739540104, + "618254336" + ], + [ + 1739540164, + "653602816" + ], + [ + 1739540224, + "688996352" + ], + [ + 1739540284, + "731578368" + ], + [ + 1739540344, + "768630784" + ], + [ + 1739540404, + "802762752" + ], + [ + 1739540464, + "844083200" + ], + [ + 1739540524, + "882003968" + ], + [ + 1739540584, + "916525056" + ], + [ + 1739540644, + "959504384" + ], + [ + 1739540704, + "995766272" + ], + [ + 1739540764, + "1031159808" + ], + [ + 1739540824, + "1070784512" + ], + [ + 1739540884, + "1104474112" + ], + [ + 1739540944, + "1147842560" + ], + [ + 1739541004, + "1182842880" + ], + [ + 1739541064, + "1222422528" + ], + [ + 1739541124, + "1259077632" + ], + [ + 1739541184, + "1299525632" + ], + [ + 1739541244, + "1335791616" + ], + [ + 1739541304, + "1371578368" + ], + [ + 1739541364, + "1410367488" + ], + [ + 1739541424, + "1452081152" + ], + [ + 1739541484, + "1486602240" + ], + [ + 1739541544, + "1525788672" + ], + [ + 1739541604, + "1564975104" + ], + [ + 1739541664, + "1600368640" + ], + [ + 1739541724, + "1637023744" + ], + [ + 1739541784, + "1677078528" + ], + [ + 1739541844, + "1715392512" + ], + [ + 1739541904, + "1753313280" + ], + [ + 1739541964, + "1789972480" + ], + [ + 1739542024, + "1826234368" + ], + [ + 1739542084, + "1866686464" + ], + [ + 1739542144, + "1903738880" + ], + [ + 1739542204, + "1939132416" + ], + [ + 1739542264, + "1974525952" + ], + [ + 1739542324, + "2014973952" + ], + [ + 1739542384, + "2055421952" + ], + [ + 1739542444, + "2092081152" + ], + [ + 1739542504, + "2091687936" + ], + [ + 1739542564, + "2091687936" + ], + [ + 1739542624, + "2091687936" + ], + [ + 1739542684, + "2091687936" + ], + [ + 1739542744, + "105877504" + ], + [ + 1739542804, + "127098880" + ], + [ + 1739542864, + "155176960" + ], + [ + 1739542924, + "194359296" + ], + [ + 1739542984, + "230117376" + ] + ] + } + ] + }, + "random_key": "UEMD", + "tool_name": "execute_prometheus_range_query", + "description": "Memory usage for analytics-exporter-slow deployment", + "query": "max(container_memory_working_set_bytes{pod=\"analytics-exporter-slow-684486cfb7-2b6lf\", namespace=\"default\"}) by (pod)", + "start": "2025-02-14T13:23:04Z", + "end": "2025-02-14T14:23:04Z", + "step": 60 +} diff --git a/tests/llm/fixtures/test_ask_holmes/32_oom_kill_graph_memory_consumption/get_current_time.txt b/tests/llm/fixtures/test_ask_holmes/32_oom_kill_graph_memory_consumption/get_current_time.txt new file mode 100644 index 00000000..d9ab289d --- /dev/null +++ b/tests/llm/fixtures/test_ask_holmes/32_oom_kill_graph_memory_consumption/get_current_time.txt @@ -0,0 +1,2 @@ +{"toolset_name":"datetime","tool_name":"get_current_time","match_params":{}} +The current UTC date and time are 2025-02-14 14:23:04.516591+00:00. The current UTC timestamp in seconds is 1739542984. diff --git a/tests/llm/fixtures/test_ask_holmes/32_oom_kill_graph_memory_consumption/kubectl_find_resource.txt b/tests/llm/fixtures/test_ask_holmes/32_oom_kill_graph_memory_consumption/kubectl_find_resource.txt new file mode 100644 index 00000000..0bddc820 --- /dev/null +++ b/tests/llm/fixtures/test_ask_holmes/32_oom_kill_graph_memory_consumption/kubectl_find_resource.txt @@ -0,0 +1,5 @@ +{"toolset_name":"kubernetes/core","tool_name":"kubectl_find_resource","match_params":{"kind":"deployment","keyword":"analytics-exporter-slow"}} +stdout: +default analytics-exporter-slow 1/1 1 1 4d6h analytics-exporter-slow us-central1-docker.pkg.dev/genuine-flight-317411/devel/memory-eater:1.0 app=analytics-exporter-slow + +stderr: diff --git a/tests/llm/fixtures/test_ask_holmes/32_oom_kill_graph_memory_consumption/kubectl_get_by_name.txt b/tests/llm/fixtures/test_ask_holmes/32_oom_kill_graph_memory_consumption/kubectl_get_by_name.txt new file mode 100644 index 00000000..df2d6c43 --- /dev/null +++ b/tests/llm/fixtures/test_ask_holmes/32_oom_kill_graph_memory_consumption/kubectl_get_by_name.txt @@ -0,0 +1,6 @@ +{"toolset_name":"kubernetes/core","tool_name":"kubectl_get_by_name","match_params":{"kind":"deployment","name":"analytics-exporter-slow","namespace":"default"}} +stdout: +NAME READY UP-TO-DATE AVAILABLE AGE CONTAINERS IMAGES SELECTOR LABELS +analytics-exporter-slow 1/1 1 1 4d6h analytics-exporter-slow us-central1-docker.pkg.dev/genuine-flight-317411/devel/memory-eater:1.0 app=analytics-exporter-slow + +stderr: diff --git a/tests/llm/fixtures/test_ask_holmes/32_oom_kill_graph_memory_consumption/kubectl_lineage_children.txt b/tests/llm/fixtures/test_ask_holmes/32_oom_kill_graph_memory_consumption/kubectl_lineage_children.txt new file mode 100644 index 00000000..7c791057 --- /dev/null +++ b/tests/llm/fixtures/test_ask_holmes/32_oom_kill_graph_memory_consumption/kubectl_lineage_children.txt @@ -0,0 +1,23 @@ +{"toolset_name":"kubernetes/kube-lineage-extras","tool_name":"kubectl_lineage_children","match_params":{"kind":"deployment","name":"analytics-exporter-slow","namespace":"default"}} +stdout: +NAME READY STATUS AGE +Deployment/analytics-exporter-slow 1/1 4d6h +├── ReplicaSet/analytics-exporter-slow-55644bc6b5 0/0 3h1m +├── ReplicaSet/analytics-exporter-slow-55b58fff76 0/0 3h10m +├── ReplicaSet/analytics-exporter-slow-65c98f9d5c 0/0 4d5h +├── ReplicaSet/analytics-exporter-slow-66dc8844dc 0/0 4d6h +├── ReplicaSet/analytics-exporter-slow-684486cfb7 1/1 125m +│ └── Pod/analytics-exporter-slow-684486cfb7-2b6lf 1/1 Running 125m +│ ├── Event/analytics-exporter-slow-684486cfb7-2b6lf.18241249922da068 - Pulling: Pulling image "us-central1-docker.pkg.dev/genuine-flight-317411/devel/memory-eater:1.0" (x3) 125m +│ ├── Event/analytics-exporter-slow-684486cfb7-2b6lf.18241249ca3e91dc - Created: Created container analytics-exporter-slow (x3) 125m +│ ├── Event/analytics-exporter-slow-684486cfb7-2b6lf.18241249cf9d4695 - Started: Started container analytics-exporter-slow (x3) 125m +│ ├── Event/analytics-exporter-slow-684486cfb7-2b6lf.182418a2fa33e13f - Pulled: Successfully pulled image "us-central1-docker.pkg.dev/genuine-flight-317411/devel/memory-eater:1.0" in 1.076s (1.076s including waiting). Image size: 47485901 bytes. 8m59s +│ └── Service/kubernetes - 66d +├── ReplicaSet/analytics-exporter-slow-6f4c8b64cf 0/0 142m +├── ReplicaSet/analytics-exporter-slow-767fd64696 0/0 3h21m +├── ReplicaSet/analytics-exporter-slow-7b74f69c65 0/0 130m +├── ReplicaSet/analytics-exporter-slow-7b98fb7847 0/0 4d5h +├── ReplicaSet/analytics-exporter-slow-7d596d8689 0/0 4d6h +└── ReplicaSet/analytics-exporter-slow-9ccc96975 0/0 3h3m + +stderr: diff --git a/tests/llm/fixtures/test_ask_holmes/32_oom_kill_graph_memory_consumption/kubectl_top_pods.txt b/tests/llm/fixtures/test_ask_holmes/32_oom_kill_graph_memory_consumption/kubectl_top_pods.txt new file mode 100644 index 00000000..a29a1535 --- /dev/null +++ b/tests/llm/fixtures/test_ask_holmes/32_oom_kill_graph_memory_consumption/kubectl_top_pods.txt @@ -0,0 +1,6 @@ +{"toolset_name":"kubernetes/live-metrics","tool_name":"kubectl_top_pods","match_params":{}} +Command `kubectl top pods -A` failed with return code 1 +stdout: + +stderr: +error: Metrics API not available diff --git a/tests/llm/fixtures/test_ask_holmes/32_oom_kill_graph_memory_consumption/list_available_metrics.txt b/tests/llm/fixtures/test_ask_holmes/32_oom_kill_graph_memory_consumption/list_available_metrics.txt new file mode 100644 index 00000000..6b2b8463 --- /dev/null +++ b/tests/llm/fixtures/test_ask_holmes/32_oom_kill_graph_memory_consumption/list_available_metrics.txt @@ -0,0 +1,4 @@ +{"toolset_name":"prometheus/metrics","tool_name":"list_available_metrics","match_params":{"name_filter":"container_memory_working_set_bytes"}} +Metric | Description | Type | Labels +---------------------------------------------------------------------------------------------------- +container_memory_working_set_bytes | Current working set in bytes. | gauge | container, endpoint, id, image, instance, job, metrics_path, name, namespace, node, pod, service diff --git a/tests/llm/fixtures/test_ask_holmes/32_oom_kill_graph_memory_consumption/slow_oom_deployment.yaml b/tests/llm/fixtures/test_ask_holmes/32_oom_kill_graph_memory_consumption/slow_oom_deployment.yaml new file mode 100644 index 00000000..1e21c191 --- /dev/null +++ b/tests/llm/fixtures/test_ask_holmes/32_oom_kill_graph_memory_consumption/slow_oom_deployment.yaml @@ -0,0 +1,32 @@ +apiVersion: apps/v1 +kind: Deployment +metadata: + name: analytics-exporter-slow +spec: + replicas: 1 + selector: + matchLabels: + app: analytics-exporter-slow + template: + metadata: + labels: + app: analytics-exporter-slow + spec: + containers: + - name: analytics-exporter-slow + image: us-central1-docker.pkg.dev/genuine-flight-317411/devel/memory-eater:1.0 + imagePullPolicy: Always + args: + - 10Mi + - "0" + - 3000Mi + - "5000" + - "2" + resources: + limits: + memory: 2000Mi + requests: + memory: 2000Mi + restartPolicy: Always + nodeSelector: + kubernetes.io/arch: amd64 diff --git a/tests/llm/fixtures/test_ask_holmes/32_oom_kill_graph_memory_consumption/test_case.yaml b/tests/llm/fixtures/test_ask_holmes/32_oom_kill_graph_memory_consumption/test_case.yaml new file mode 100644 index 00000000..52a76359 --- /dev/null +++ b/tests/llm/fixtures/test_ask_holmes/32_oom_kill_graph_memory_consumption/test_case.yaml @@ -0,0 +1,13 @@ +user_prompt: "Show me the memory usage over time for the `analytics-exporter-slow` deployment. Does it indicate a memory leak?" +expected_output: + - 'The answer should include something similar to << { tool_name: "execute_prometheus_range_query", random_key: "XXX" } >>.' + - "Yes, the memory profile indicates a memory leak" +# These are slow going. It takes an hour to get one pod killed. This is to make sure the mem leak is slow enough to look like a mem leak for the LLM to see it as such. +# Running with RUN_LIVE will potentially fail the test which is expected. Run the deployment for a couple of hours before running this test live. +before_test: | + kubectl apply -f ./slow_oom_deployment.yaml + sleep 300 +after_test: | + kubectl delete -f ./slow_oom_deployment.yaml +evaluation: + correctness: 1 diff --git a/tests/llm/fixtures/test_ask_holmes/33_cpu_throttling_graph/execute_prometheus_range_query.txt b/tests/llm/fixtures/test_ask_holmes/33_cpu_throttling_graph/execute_prometheus_range_query.txt new file mode 100644 index 00000000..68ba224d --- /dev/null +++ b/tests/llm/fixtures/test_ask_holmes/33_cpu_throttling_graph/execute_prometheus_range_query.txt @@ -0,0 +1,231 @@ +{"toolset_name":"prometheus/metrics","tool_name":"execute_prometheus_range_query","match_params":{"query":"sum(rate(container_cpu_cfs_throttled_periods_total{pod=\"login-app-58995d8584-pbv8p\"}[5m])) by (pod)","description":"*","start":"1739537300","end":"1739540900","step":60}} +{ + "status": "success", + "data": { + "resultType": "matrix", + "result": [ + { + "metric": { + "pod": "login-app-58995d8584-pbv8p" + }, + "values": [ + [ + 1739537840, + "3.4072032186026733" + ], + [ + 1739537900, + "5.406597523823652" + ], + [ + 1739537960, + "7.408753290644883" + ], + [ + 1739538020, + "9.404728838224925" + ], + [ + 1739538080, + "9.995487112162047" + ], + [ + 1739538140, + "8.63625" + ], + [ + 1739538200, + "9.995438074644968" + ], + [ + 1739538260, + "10.001557528424893" + ], + [ + 1739538320, + "10.00137849975695" + ], + [ + 1739538380, + "9.999286569865461" + ], + [ + 1739538440, + "9.99430339128491" + ], + [ + 1739538500, + "9.99486313935569" + ], + [ + 1739538560, + "9.996669024548167" + ], + [ + 1739538620, + "9.994355347478168" + ], + [ + 1739538680, + "9.99736217356898" + ], + [ + 1739538740, + "9.999829470848043" + ], + [ + 1739538800, + "10.001075783374326" + ], + [ + 1739538860, + "9.36550466405067" + ], + [ + 1739538920, + "9.348420425061777" + ], + [ + 1739538980, + "10.001689358078405" + ], + [ + 1739539040, + "9.260918713816103" + ], + [ + 1739539100, + "9.984031632575467" + ], + [ + 1739539160, + "9.98251670543612" + ], + [ + 1739539220, + "9.982983972239353" + ], + [ + 1739539280, + "9.251973814906151" + ], + [ + 1739539340, + "10.002337569665729" + ], + [ + 1739539400, + "9.279420970701942" + ], + [ + 1739539460, + "9.373990551064944" + ], + [ + 1739539520, + "10.000871852250107" + ], + [ + 1739539580, + "9.998340371388002" + ], + [ + 1739539640, + "10.00240247150026" + ], + [ + 1739539700, + "9.998464655240205" + ], + [ + 1739539760, + "10.002335762023522" + ], + [ + 1739539820, + "9.999705891003204" + ], + [ + 1739539880, + "9.993962889957304" + ], + [ + 1739539940, + "9.992202616455367" + ], + [ + 1739540000, + "9.986677436612922" + ], + [ + 1739540060, + "9.990708709470603" + ], + [ + 1739540120, + "9.990659139828615" + ], + [ + 1739540180, + "9.999596494638883" + ], + [ + 1739540240, + "9.998636884433589" + ], + [ + 1739540300, + "9.989851698991583" + ], + [ + 1739540360, + "9.99259125473183" + ], + [ + 1739540420, + "9.387131423731198" + ], + [ + 1739540480, + "9.993471254520895" + ], + [ + 1739540540, + "9.999655843584732" + ], + [ + 1739540600, + "9.997547981542988" + ], + [ + 1739540660, + "9.349657281442829" + ], + [ + 1739540720, + "9.997990544183383" + ], + [ + 1739540780, + "10.00300465721869" + ], + [ + 1739540840, + "10.001487763148106" + ], + [ + 1739540900, + "9.28204958504213" + ] + ] + } + ] + }, + "random_key": "KmFa", + "tool_name": "execute_prometheus_range_query", + "description": "CPU throttling for login-app pod", + "query": "sum(rate(container_cpu_cfs_throttled_periods_total{pod=\"login-app-58995d8584-pbv8p\"}[5m])) by (pod)", + "start": "1739537300", + "end": "1739540900", + "step": 60 +} diff --git a/tests/llm/fixtures/test_ask_holmes/33_cpu_throttling_graph/get_current_time.txt b/tests/llm/fixtures/test_ask_holmes/33_cpu_throttling_graph/get_current_time.txt new file mode 100644 index 00000000..1b1fcfe5 --- /dev/null +++ b/tests/llm/fixtures/test_ask_holmes/33_cpu_throttling_graph/get_current_time.txt @@ -0,0 +1,2 @@ +{"toolset_name":"datetime","tool_name":"get_current_time","match_params":{}} +The current UTC date and time are 2025-02-14 13:48:20.037094+00:00. The current UTC timestamp in seconds is 1739540900. diff --git a/tests/llm/fixtures/test_ask_holmes/33_cpu_throttling_graph/helm/Dockerfile b/tests/llm/fixtures/test_ask_holmes/33_cpu_throttling_graph/helm/Dockerfile new file mode 100644 index 00000000..f932d018 --- /dev/null +++ b/tests/llm/fixtures/test_ask_holmes/33_cpu_throttling_graph/helm/Dockerfile @@ -0,0 +1,19 @@ +FROM python:3.10-slim + +# Set working directory +WORKDIR /app + +# Copy requirements.txt +COPY requirements.txt . + +# Install dependencies +RUN pip install --no-cache-dir -r requirements.txt + +# Copy the FastAPI app +COPY . . + +# Expose the ports +EXPOSE 8000 8001 + +# Run the FastAPI app +CMD ["uvicorn", "app:app", "--host", "0.0.0.0", "--port", "8000"] diff --git a/tests/llm/fixtures/test_ask_holmes/33_cpu_throttling_graph/helm/app.py b/tests/llm/fixtures/test_ask_holmes/33_cpu_throttling_graph/helm/app.py new file mode 100644 index 00000000..a66da5bd --- /dev/null +++ b/tests/llm/fixtures/test_ask_holmes/33_cpu_throttling_graph/helm/app.py @@ -0,0 +1,54 @@ +# ruff: noqa: F821 +import logging +import time +from fastapi import FastAPI +from fastapi.responses import JSONResponse +from prometheus_fastapi_instrumentator import Instrumentator +import bcrypt +import json + +app = FastAPI() + +logging.basicConfig(level=logging.INFO) +logger = logging.getLogger(__name__) + +Instrumentator().instrument(app).expose(app) + + +def verify_password(): + logger.info( + "Connecting to promotions database to see if we should try to upsell user" + ) + try: + start_time = time.time() + logger.info("Verify password") + + password = b"test_password" + salt = bcrypt.gensalt(rounds=15) + bcrypt.hashpw(password, salt) + + end_time = time.time() + logger.info( + f"Password verification completed in {end_time - start_time:.2f} seconds." + ) + + return True + except Exception as e: + logger.error(f"Error checking for password: {e}") + return False + + +@app.get("/", response_class=JSONResponse) +def read_root(): + logger.info("Received request for checkout page.") + start_time = time.time() + is_valid = verify_password() + end_time = time.time() + logger.info(f"Page rendered in {end_time - start_time:.2f} seconds.") + return json.dumps({"valid": is_valid}) + + +if __name__ == "__main__": + # Start Prometheus metrics server + start_http_server(8001) + uvicorn.run(app, host="0.0.0.0", port=8000) diff --git a/tests/llm/fixtures/test_ask_holmes/33_cpu_throttling_graph/helm/build.sh b/tests/llm/fixtures/test_ask_holmes/33_cpu_throttling_graph/helm/build.sh new file mode 100755 index 00000000..b1349366 --- /dev/null +++ b/tests/llm/fixtures/test_ask_holmes/33_cpu_throttling_graph/helm/build.sh @@ -0,0 +1 @@ +docker buildx build --platform linux/amd64 . -t us-central1-docker.pkg.dev/genuine-flight-317411/devel/cpu-throttling-demo diff --git a/tests/llm/fixtures/test_ask_holmes/33_cpu_throttling_graph/helm/manifest.yaml b/tests/llm/fixtures/test_ask_holmes/33_cpu_throttling_graph/helm/manifest.yaml new file mode 100644 index 00000000..7ad2a2c4 --- /dev/null +++ b/tests/llm/fixtures/test_ask_holmes/33_cpu_throttling_graph/helm/manifest.yaml @@ -0,0 +1,100 @@ +apiVersion: apps/v1 +kind: Deployment +metadata: + name: login-app +spec: + replicas: 1 + selector: + matchLabels: + app: login-app + template: + metadata: + labels: + app: login-app + spec: + containers: + - name: login-app + image: us-central1-docker.pkg.dev/genuine-flight-317411/devel/cpu-throttling-demo + ports: + - containerPort: 8000 + - containerPort: 8001 + resources: + requests: + cpu: "400m" + limits: + cpu: "800m" + - name: curl-sidecar-1 + image: curlimages/curl + args: + - /bin/sh + - -c + - while true; do curl -s http://localhost:8000; sleep 1; done + - name: curl-sidecar-2 + image: curlimages/curl + args: + - /bin/sh + - -c + - while true; do curl -s http://localhost:8000; sleep 1; done +--- +apiVersion: v1 +kind: Service +metadata: + name: login-app-service + labels: + app: login-app +spec: + selector: + app: login-app + ports: + - protocol: TCP + port: 80 + targetPort: 8000 + name: http + type: ClusterIP +--- +apiVersion: monitoring.coreos.com/v1 +kind: ServiceMonitor +metadata: + name: login-app-service-monitor + labels: + release: robusta +spec: + selector: + matchLabels: + app: login-app + endpoints: + - port: http + path: /metrics + interval: 5s + namespaceSelector: + matchNames: + - default +--- +apiVersion: monitoring.coreos.com/v1 +kind: PrometheusRule +metadata: + name: login-app-alert-rules + labels: + release: robusta +spec: + groups: + - name: loginapp.rules + rules: + - alert: LoginAppCPUThrottling + expr: | + (rate(container_cpu_cfs_throttled_seconds_total{container="login-app"}[5m]) > 0) + for: 1m + labels: + severity: warning + annotations: + summary: "Login App CPU Throttling Detected" + description: "Container {{ $labels.container }} in pod {{ $labels.pod }} has been CPU throttled for the last 5 minutes. This might impact application performance." + - alert: LoginAppHighCPUThrottling + expr: | + (rate(container_cpu_cfs_throttled_seconds_total{container="login-app"}[5m]) > 0.1) + for: 5m + labels: + severity: critical + annotations: + summary: "Login App Severe CPU Throttling" + description: "Container {{ $labels.container }} in pod {{ $labels.pod }} is experiencing severe CPU throttling (>10% of CPU time). Immediate attention required." diff --git a/tests/llm/fixtures/test_ask_holmes/33_cpu_throttling_graph/helm/requirements.txt b/tests/llm/fixtures/test_ask_holmes/33_cpu_throttling_graph/helm/requirements.txt new file mode 100644 index 00000000..1e7a4b90 --- /dev/null +++ b/tests/llm/fixtures/test_ask_holmes/33_cpu_throttling_graph/helm/requirements.txt @@ -0,0 +1,5 @@ +bcrypt +fastapi +uvicorn[standard] +pymysql +prometheus-fastapi-instrumentator diff --git a/tests/llm/fixtures/test_ask_holmes/33_cpu_throttling_graph/kubectl_find_resource.txt b/tests/llm/fixtures/test_ask_holmes/33_cpu_throttling_graph/kubectl_find_resource.txt new file mode 100644 index 00000000..33bd5eb7 --- /dev/null +++ b/tests/llm/fixtures/test_ask_holmes/33_cpu_throttling_graph/kubectl_find_resource.txt @@ -0,0 +1,5 @@ +{"toolset_name":"kubernetes/core","tool_name":"kubectl_find_resource","match_params":{"kind":"pod","keyword":"login-app"}} +stdout: +default login-app-58995d8584-pbv8p 3/3 Running 0 52m 10.244.0.105 grafana-cloud-control-plane app=login-app,pod-template-hash=58995d8584 + +stderr: diff --git a/tests/llm/fixtures/test_ask_holmes/33_cpu_throttling_graph/kubectl_top_pods.txt b/tests/llm/fixtures/test_ask_holmes/33_cpu_throttling_graph/kubectl_top_pods.txt new file mode 100644 index 00000000..a29a1535 --- /dev/null +++ b/tests/llm/fixtures/test_ask_holmes/33_cpu_throttling_graph/kubectl_top_pods.txt @@ -0,0 +1,6 @@ +{"toolset_name":"kubernetes/live-metrics","tool_name":"kubectl_top_pods","match_params":{}} +Command `kubectl top pods -A` failed with return code 1 +stdout: + +stderr: +error: Metrics API not available diff --git a/tests/llm/fixtures/test_ask_holmes/33_cpu_throttling_graph/list_available_metrics.txt b/tests/llm/fixtures/test_ask_holmes/33_cpu_throttling_graph/list_available_metrics.txt new file mode 100644 index 00000000..3deb9867 --- /dev/null +++ b/tests/llm/fixtures/test_ask_holmes/33_cpu_throttling_graph/list_available_metrics.txt @@ -0,0 +1,4 @@ +{"toolset_name":"prometheus/metrics","tool_name":"list_available_metrics","match_params":{"name_filter":"container_cpu_cfs_throttled_periods_total"}} +Metric | Description | Type | Labels +---------------------------------------------------------------------------------------------------- +container_cpu_cfs_throttled_periods_total | Number of throttled period intervals. | counter | container, endpoint, id, image, instance, job, metrics_path, name, namespace, node, pod, service diff --git a/tests/llm/fixtures/test_ask_holmes/33_cpu_throttling_graph/test_case.yaml b/tests/llm/fixtures/test_ask_holmes/33_cpu_throttling_graph/test_case.yaml new file mode 100644 index 00000000..78faf42e --- /dev/null +++ b/tests/llm/fixtures/test_ask_holmes/33_cpu_throttling_graph/test_case.yaml @@ -0,0 +1,7 @@ +user_prompt: "Is the login-app being CPU throttled? Show me." +expected_output: + - 'The answer should include something similar to << { tool_name: "execute_prometheus_range_query", random_key: "XXX" } >>.' +before_test: kubectl apply -f ./helm/manifest.yaml +after_test: kubectl delete -f ./helm/manifest.yaml +evaluation: + correctness: 1 diff --git a/tests/llm/fixtures/test_ask_holmes/34_http_latency_graph/execute_prometheus_range_query.txt b/tests/llm/fixtures/test_ask_holmes/34_http_latency_graph/execute_prometheus_range_query.txt new file mode 100644 index 00000000..45b22f66 --- /dev/null +++ b/tests/llm/fixtures/test_ask_holmes/34_http_latency_graph/execute_prometheus_range_query.txt @@ -0,0 +1,1100 @@ +{"toolset_name":"prometheus/metrics","tool_name":"execute_prometheus_range_query","match_params":{"query":"http_request_duration_seconds_sum / http_request_duration_seconds_count","description":"*","start":"*","end":"*","step":"*"}} +{ + "status": "success", + "data": { + "resultType": "matrix", + "result": [ + { + "metric": { + "container": "fastapi-app", + "endpoint": "http", + "handler": "/", + "instance": "10.244.0.19:8000", + "job": "customer-orders-service", + "method": "GET", + "namespace": "default", + "pod": "customer-orders-6f5cbdf85-c5fsf", + "service": "customer-orders-service" + }, + "values": [ + [ + 1739773059, + "7.2587948227500085" + ], + [ + 1739773359, + "8.23126970688889" + ], + [ + 1739773659, + "7.9317453926923145" + ], + [ + 1739773959, + "8.008496379882345" + ], + [ + 1739774259, + "8.053626901636362" + ], + [ + 1739774559, + "7.815565156692309" + ], + [ + 1739774859, + "7.555963071935487" + ], + [ + 1739775159, + "7.5790614398285765" + ], + [ + 1739775459, + "7.732811918500014" + ], + [ + 1739775759, + "7.689560054227289" + ], + [ + 1739776059, + "7.660539889428578" + ], + [ + 1739776359, + "7.630091538603779" + ], + [ + 1739776659, + "7.656471271473686" + ], + [ + 1739776959, + "7.636309512241953" + ], + [ + 1739777259, + "7.583012037212128" + ], + [ + 1739777559, + "7.500178550943661" + ], + [ + 1739777859, + "7.500630043146668" + ], + [ + 1739778159, + "7.557407538824999" + ], + [ + 1739778459, + "7.638421752964276" + ], + [ + 1739778759, + "7.625372610382013" + ], + [ + 1739779059, + "7.566496483698924" + ], + [ + 1739779359, + "7.548145681469384" + ], + [ + 1739779659, + "7.5465564971274475" + ], + [ + 1739779959, + "7.601751123707535" + ], + [ + 1739780259, + "7.565906024747731" + ], + [ + 1739780559, + "7.607407520895647" + ], + [ + 1739780859, + "7.574012064941668" + ], + [ + 1739781159, + "7.539496465258071" + ], + [ + 1739781459, + "7.573159430651165" + ], + [ + 1739781759, + "7.571160032458671" + ], + [ + 1739782059, + "7.5619592621898155" + ], + [ + 1739782359, + "7.570613084725389" + ], + [ + 1739782659, + "7.568860758157557" + ], + [ + 1739782959, + "7.543620602437094" + ], + [ + 1739783259, + "7.5233010567161465" + ], + [ + 1739783559, + "7.494653588650015" + ], + [ + 1739783859, + "7.464420700451223" + ], + [ + 1739784159, + "7.427150739071009" + ], + [ + 1739784459, + "7.463726934161848" + ], + [ + 1739784759, + "7.49586881620787" + ], + [ + 1739785059, + "7.474120202379134" + ], + [ + 1739785359, + "7.450943975796812" + ], + [ + 1739785659, + "7.415401043261803" + ], + [ + 1739785959, + "7.445837658071445" + ], + [ + 1739786259, + "7.467061023510009" + ], + [ + 1739786559, + "7.472762682303937" + ], + [ + 1739786859, + "7.442438846483263" + ], + [ + 1739787159, + "7.453061777225362" + ], + [ + 1739787459, + "7.410695965944959" + ], + [ + 1739787759, + "7.430464756954955" + ], + [ + 1739788059, + "7.4475660274229005" + ], + [ + 1739788359, + "7.448570901545438" + ], + [ + 1739788659, + "7.430730616487269" + ], + [ + 1739788959, + "7.4528477457374835" + ], + [ + 1739789259, + "7.441440416938509" + ], + [ + 1739789559, + "7.448793543385554" + ], + [ + 1739789859, + "7.449726605861679" + ], + [ + 1739790159, + "7.460546300430233" + ], + [ + 1739790459, + "7.4650803777862516" + ], + [ + 1739790759, + "7.45649466648687" + ], + [ + 1739791059, + "7.457236310977847" + ], + [ + 1739791359, + "7.461593859447282" + ], + [ + 1739791659, + "7.4641863409714295" + ], + [ + 1739791959, + "7.457733998193641" + ] + ] + }, + { + "metric": { + "container": "fastapi-app", + "endpoint": "http", + "handler": "/metrics", + "instance": "10.244.0.19:8000", + "job": "customer-orders-service", + "method": "GET", + "namespace": "default", + "pod": "customer-orders-6f5cbdf85-c5fsf", + "service": "customer-orders-service" + }, + "values": [ + [ + 1739773059, + "0.0011778313823494185" + ], + [ + 1739773359, + "0.0012135840270238879" + ], + [ + 1739773659, + "0.0012112306929768024" + ], + [ + 1739773959, + "0.0012008533701252202" + ], + [ + 1739774259, + "0.001195991505147101" + ], + [ + 1739774559, + "0.0011950833803378216" + ], + [ + 1739774859, + "0.0011962475656896961" + ], + [ + 1739775159, + "0.0012018490413947748" + ], + [ + 1739775459, + "0.0012095391949127807" + ], + [ + 1739775759, + "0.0012050768477123908" + ], + [ + 1739776059, + "0.0012054137350186792" + ], + [ + 1739776359, + "0.001206090597041833" + ], + [ + 1739776659, + "0.001211775904666278" + ], + [ + 1739776959, + "0.0012099143664220488" + ], + [ + 1739777259, + "0.001209277887199517" + ], + [ + 1739777559, + "0.001206946253935319" + ], + [ + 1739777859, + "0.0012048928145299653" + ], + [ + 1739778159, + "0.0012041727450907188" + ], + [ + 1739778459, + "0.0012038800596742055" + ], + [ + 1739778759, + "0.0012027131334943577" + ], + [ + 1739779059, + "0.0012019704820075505" + ], + [ + 1739779359, + "0.001201262989698879" + ], + [ + 1739779659, + "0.001199501343542595" + ], + [ + 1739779959, + "0.0011993036855336462" + ], + [ + 1739780259, + "0.00119945177766277" + ], + [ + 1739780559, + "0.0012001496972885853" + ], + [ + 1739780859, + "0.001200470073553606" + ], + [ + 1739781159, + "0.0012024492333818396" + ], + [ + 1739781459, + "0.0012040872252957117" + ], + [ + 1739781759, + "0.0012049704045202848" + ], + [ + 1739782059, + "0.0012057259781239336" + ], + [ + 1739782359, + "0.0012063729497669208" + ], + [ + 1739782659, + "0.0012076399840166019" + ], + [ + 1739782959, + "0.0012078742991129643" + ], + [ + 1739783259, + "0.0012052392969895296" + ], + [ + 1739783559, + "0.0012046767015325762" + ], + [ + 1739783859, + "0.0012071719776108071" + ], + [ + 1739784159, + "0.0012076973051504411" + ], + [ + 1739784459, + "0.0012083144446567524" + ], + [ + 1739784759, + "0.0012094431712657034" + ], + [ + 1739785059, + "0.001211459146878997" + ], + [ + 1739785359, + "0.0012136649074057143" + ], + [ + 1739785659, + "0.0012133093249669534" + ], + [ + 1739785959, + "0.0012123688916771459" + ], + [ + 1739786259, + "0.0012125048874028716" + ], + [ + 1739786559, + "0.0012116863064364715" + ], + [ + 1739786859, + "0.0012119315314876766" + ], + [ + 1739787159, + "0.0012129045611326296" + ], + [ + 1739787459, + "0.0012121163132036183" + ], + [ + 1739787759, + "0.0012125796785343247" + ], + [ + 1739788059, + "0.001213379090460214" + ], + [ + 1739788359, + "0.0012147431721285036" + ], + [ + 1739788659, + "0.0012140533344325655" + ], + [ + 1739788959, + "0.0012117559192166662" + ], + [ + 1739789259, + "0.0012106095432976303" + ], + [ + 1739789559, + "0.0012095006302576964" + ], + [ + 1739789859, + "0.001209972794638966" + ], + [ + 1739790159, + "0.0012095507696670438" + ], + [ + 1739790459, + "0.0012096573347496315" + ], + [ + 1739790759, + "0.0012104484912255531" + ], + [ + 1739791059, + "0.0012108398126455613" + ], + [ + 1739791359, + "0.0012106329894833428" + ], + [ + 1739791659, + "0.0012124586849537311" + ], + [ + 1739791959, + "0.001211919155042263" + ] + ] + }, + { + "metric": { + "container": "login-app", + "endpoint": "http", + "handler": "/", + "instance": "10.244.0.30:8000", + "job": "login-app-service", + "method": "GET", + "namespace": "default", + "pod": "login-app-58995d8584-pbv8p", + "service": "login-app-service" + }, + "values": [ + [ + 1739773059, + "2.83138598130303" + ], + [ + 1739773359, + "2.830102092584772" + ], + [ + 1739773659, + "2.8284908548359566" + ], + [ + 1739773959, + "2.8278310807242577" + ], + [ + 1739774259, + "2.828446740489451" + ], + [ + 1739774559, + "2.828184324408743" + ], + [ + 1739774859, + "2.8279011449346414" + ], + [ + 1739775159, + "2.827529304447884" + ], + [ + 1739775459, + "2.827177012685198" + ], + [ + 1739775759, + "2.8271436622096044" + ], + [ + 1739776059, + "2.827210650151357" + ], + [ + 1739776359, + "2.8275159268187777" + ], + [ + 1739776659, + "2.827450344868665" + ], + [ + 1739776959, + "2.8274680421075318" + ], + [ + 1739777259, + "2.8275885253004827" + ], + [ + 1739777559, + "2.827507495578638" + ], + [ + 1739777859, + "2.827654852458657" + ], + [ + 1739778159, + "2.827741677818121" + ], + [ + 1739778459, + "2.8277080907233025" + ], + [ + 1739778759, + "2.8279315805660103" + ], + [ + 1739779059, + "2.828051686273458" + ], + [ + 1739779359, + "2.8291546968806" + ], + [ + 1739779659, + "2.8291437681633993" + ], + [ + 1739779959, + "2.829080858675334" + ], + [ + 1739780259, + "2.8290324227316668" + ], + [ + 1739780559, + "2.8289147258891867" + ], + [ + 1739780859, + "2.8288079988888084" + ], + [ + 1739781159, + "2.8287009978934834" + ], + [ + 1739781459, + "2.8288963259324067" + ], + [ + 1739781759, + "2.8292331362227507" + ], + [ + 1739782059, + "2.8294056472924414" + ], + [ + 1739782359, + "2.8293562156019734" + ], + [ + 1739782659, + "2.829205154885758" + ], + [ + 1739782959, + "2.8290954958162438" + ], + [ + 1739783259, + "2.8290240288020576" + ], + [ + 1739783559, + "2.828992801825756" + ], + [ + 1739783859, + "2.829000384073736" + ], + [ + 1739784159, + "2.8289273408123683" + ], + [ + 1739784459, + "2.8288087803547852" + ], + [ + 1739784759, + "2.828647169326283" + ], + [ + 1739785059, + "2.8285850495885825" + ], + [ + 1739785359, + "2.828510335493367" + ], + [ + 1739785659, + "2.8285983651139857" + ], + [ + 1739785959, + "2.8285662159537983" + ], + [ + 1739786259, + "2.8288122549132066" + ], + [ + 1739786559, + "2.8290214643176306" + ], + [ + 1739786859, + "2.8291358078182376" + ], + [ + 1739787159, + "2.8290825703515603" + ], + [ + 1739787459, + "2.8290425153097876" + ], + [ + 1739787759, + "2.8290476434794325" + ], + [ + 1739788059, + "2.8291700131104025" + ], + [ + 1739788359, + "2.8292411950691903" + ], + [ + 1739788659, + "2.8292403303221807" + ], + [ + 1739788959, + "2.8291497713546416" + ], + [ + 1739789259, + "2.829131832617153" + ], + [ + 1739789559, + "2.829138672372275" + ], + [ + 1739789859, + "2.8291698940982037" + ], + [ + 1739790159, + "2.8291522689544943" + ], + [ + 1739790459, + "2.8290932602137135" + ], + [ + 1739790759, + "2.829067023508567" + ], + [ + 1739791059, + "2.829232057446435" + ], + [ + 1739791359, + "2.8293521537521373" + ], + [ + 1739791659, + "2.8293414115216846" + ], + [ + 1739791959, + "2.8293539812287145" + ] + ] + }, + { + "metric": { + "container": "login-app", + "endpoint": "http", + "handler": "/metrics", + "instance": "10.244.0.30:8000", + "job": "login-app-service", + "method": "GET", + "namespace": "default", + "pod": "login-app-58995d8584-pbv8p", + "service": "login-app-service" + }, + "values": [ + [ + 1739773059, + "0.00846668857143158" + ], + [ + 1739773359, + "0.008678134018348665" + ], + [ + 1739773659, + "0.008588971893490137" + ], + [ + 1739773959, + "0.008546644860262416" + ], + [ + 1739774259, + "0.00877100479930647" + ], + [ + 1739774559, + "0.008492624879654503" + ], + [ + 1739774859, + "0.008520295870417598" + ], + [ + 1739775159, + "0.0085960204584219" + ], + [ + 1739775459, + "0.008748462376179489" + ], + [ + 1739775759, + "0.008707839146004707" + ], + [ + 1739776059, + "0.008802245097065673" + ], + [ + 1739776359, + "0.00884219228630757" + ], + [ + 1739776659, + "0.00880753871780504" + ], + [ + 1739776959, + "0.008836821677914912" + ], + [ + 1739777259, + "0.008843312683905301" + ], + [ + 1739777559, + "0.008805021601678644" + ], + [ + 1739777859, + "0.008850501024768212" + ], + [ + 1739778159, + "0.008829969988763765" + ], + [ + 1739778459, + "0.00887498152169415" + ], + [ + 1739778759, + "0.008862755513872543" + ], + [ + 1739779059, + "0.0088982690792612" + ], + [ + 1739779359, + "0.008881663406415463" + ], + [ + 1739779659, + "0.008891747513508508" + ], + [ + 1739779959, + "0.008922389587117533" + ], + [ + 1739780259, + "0.008962377049021274" + ], + [ + 1739780559, + "0.008975942372491854" + ], + [ + 1739780859, + "0.008994294058416386" + ], + [ + 1739781159, + "0.008982070990407206" + ], + [ + 1739781459, + "0.0089648725812625" + ], + [ + 1739781759, + "0.008960532272217001" + ], + [ + 1739782059, + "0.008998948832883553" + ], + [ + 1739782359, + "0.009003110165003011" + ], + [ + 1739782659, + "0.009032165633308479" + ], + [ + 1739782959, + "0.009047435979789789" + ], + [ + 1739783259, + "0.009012811550979962" + ], + [ + 1739783559, + "0.00903455379664957" + ], + [ + 1739783859, + "0.008989507687646312" + ], + [ + 1739784159, + "0.009035354254298638" + ], + [ + 1739784459, + "0.009003218899959945" + ], + [ + 1739784759, + "0.009004752629975651" + ], + [ + 1739785059, + "0.009022955156801054" + ], + [ + 1739785359, + "0.00902572053288378" + ], + [ + 1739785659, + "0.00904632557026731" + ], + [ + 1739785959, + "0.009043385511223401" + ], + [ + 1739786259, + "0.009062357688732045" + ], + [ + 1739786559, + "0.009095935643143118" + ], + [ + 1739786859, + "0.009121873600573585" + ], + [ + 1739787159, + "0.009126550629839516" + ], + [ + 1739787459, + "0.009133710421648376" + ], + [ + 1739787759, + "0.009145951813987443" + ], + [ + 1739788059, + "0.009141624673337777" + ], + [ + 1739788359, + "0.009141860745902083" + ], + [ + 1739788659, + "0.009140670097509828" + ], + [ + 1739788959, + "0.009130774197275822" + ], + [ + 1739789259, + "0.009111968951355867" + ], + [ + 1739789559, + "0.009121130800240896" + ], + [ + 1739789859, + "0.009117942878851645" + ], + [ + 1739790159, + "0.009126771455755866" + ], + [ + 1739790459, + "0.009153510467274579" + ], + [ + 1739790759, + "0.009165751174705604" + ], + [ + 1739791059, + "0.009172422976989395" + ], + [ + 1739791359, + "0.009165314803188105" + ], + [ + 1739791659, + "0.009162682029190416" + ], + [ + 1739791959, + "0.009169567396190837" + ] + ] + } + ] + }, + "random_key": "0we9", + "tool_name": "execute_prometheus_range_query", + "description": "Average HTTP request latency for customer-orders-service", + "query": "http_request_duration_seconds_sum / http_request_duration_seconds_count", + "start": "1739705559", + "end": "1739791959", + "step": 300 +} diff --git a/tests/llm/fixtures/test_ask_holmes/34_http_latency_graph/execute_prometheus_range_query_2.txt b/tests/llm/fixtures/test_ask_holmes/34_http_latency_graph/execute_prometheus_range_query_2.txt new file mode 100644 index 00000000..080cb1d3 --- /dev/null +++ b/tests/llm/fixtures/test_ask_holmes/34_http_latency_graph/execute_prometheus_range_query_2.txt @@ -0,0 +1,2598 @@ +{"toolset_name":"prometheus/metrics","tool_name":"execute_prometheus_range_query","match_params":{"query":"rate(http_request_duration_seconds_sum{service=\"customer-orders-service\"}[5m]) / rate(http_request_duration_seconds_count{service=\"customer-orders-service\"}[5m])","description":"*","start":"*","end":"*","step":"*"}} +{ + "status": "success", + "data": { + "resultType": "matrix", + "result": [ + { + "metric": { + "container": "fastapi-app", + "endpoint": "http", + "handler": "/", + "instance": "10.244.0.19:8000", + "job": "customer-orders-service", + "method": "GET", + "namespace": "default", + "pod": "customer-orders-6f5cbdf85-c5fsf", + "service": "customer-orders-service" + }, + "values": [ + [ + 1739772879, + "6.006911542000012" + ], + [ + 1739772939, + "6.006911542000012" + ], + [ + 1739772999, + "7.007923973500012" + ], + [ + 1739773059, + "7.007747082000009" + ], + [ + 1739773119, + "7.758122644250001" + ], + [ + 1739773179, + "7.757933481999991" + ], + [ + 1739773239, + "8.258781271749982" + ], + [ + 1739773299, + "8.758789859749996" + ], + [ + 1739773359, + "9.009249614199996" + ], + [ + 1739773419, + "8.759249685" + ], + [ + 1739773479, + "9.009729588249996" + ], + [ + 1739773539, + "8.759213496500024" + ], + [ + 1739773599, + "7.758259760250013" + ], + [ + 1739773659, + "7.257815685750017" + ], + [ + 1739773719, + "7.508067395250009" + ], + [ + 1739773779, + "7.507443340749944" + ], + [ + 1739773839, + "8.258002217249949" + ], + [ + 1739773899, + "8.208212137399956" + ], + [ + 1739773959, + "8.257937088249943" + ], + [ + 1739774019, + "8.757508732499954" + ], + [ + 1739774079, + "8.256989014999984" + ], + [ + 1739774139, + "8.757990374249971" + ], + [ + 1739774199, + "9.007368194499973" + ], + [ + 1739774259, + "7.756996364250028" + ], + [ + 1739774319, + "7.757246777000034" + ], + [ + 1739774379, + "7.206252581800026" + ], + [ + 1739774439, + "6.2054873940000554" + ], + [ + 1739774499, + "5.505218467750069" + ], + [ + 1739774559, + "6.506225559500024" + ], + [ + 1739774619, + "6.505919401750021" + ], + [ + 1739774679, + "6.506882022250011" + ], + [ + 1739774739, + "6.756950814499987" + ], + [ + 1739774799, + "5.755944083249972" + ], + [ + 1739774859, + "6.0061740067500295" + ], + [ + 1739774919, + "7.257413727750077" + ], + [ + 1739774979, + "6.807173017200056" + ], + [ + 1739775039, + "7.007574977400054" + ], + [ + 1739775099, + "7.507998211250083" + ], + [ + 1739775159, + "7.758073791000015" + ], + [ + 1739775219, + "7.757631947749929" + ], + [ + 1739775279, + "8.00785348324996" + ], + [ + 1739775339, + "8.75813938500005" + ], + [ + 1739775399, + "8.75895610100008" + ], + [ + 1739775459, + "8.509002138000142" + ], + [ + 1739775519, + "8.208447821000117" + ], + [ + 1739775579, + "8.758785697000121" + ], + [ + 1739775639, + "7.75821505099998" + ], + [ + 1739775699, + "7.007337355749996" + ], + [ + 1739775759, + "7.25704141150004" + ], + [ + 1739775819, + "8.006123858000024" + ], + [ + 1739775879, + "8.005737399250052" + ], + [ + 1739775939, + "9.006552918750003" + ], + [ + 1739775999, + "8.205668952799988" + ], + [ + 1739776059, + "7.40516243919992" + ], + [ + 1739776119, + "6.755812964249912" + ], + [ + 1739776179, + "7.256094892499959" + ], + [ + 1739776239, + "6.254962020999982" + ], + [ + 1739776299, + "7.506989841250061" + ], + [ + 1739776359, + "7.257099240999992" + ], + [ + 1739776419, + "7.257704500999922" + ], + [ + 1739776479, + "8.008136494249925" + ], + [ + 1739776539, + "8.207737753999936" + ], + [ + 1739776599, + "7.406046824799886" + ], + [ + 1739776659, + "8.006002731999956" + ], + [ + 1739776719, + "7.755197058000022" + ], + [ + 1739776779, + "7.004803537000043" + ], + [ + 1739776839, + "6.505294160750168" + ], + [ + 1739776899, + "7.255810632500243" + ], + [ + 1739776959, + "7.506593340000222" + ], + [ + 1739777019, + "7.2567012235001584" + ], + [ + 1739777079, + "7.807419219000075" + ], + [ + 1739777139, + "7.806900095199854" + ], + [ + 1739777199, + "7.007337890399866" + ], + [ + 1739777259, + "6.756901174249833" + ], + [ + 1739777319, + "7.007034794499987" + ], + [ + 1739777379, + "5.755951220500038" + ], + [ + 1739777439, + "5.506015439000066" + ], + [ + 1739777499, + "6.507039589749866" + ], + [ + 1739777559, + "6.50689723499977" + ], + [ + 1739777619, + "6.757224703249676" + ], + [ + 1739777679, + "6.807423797599767" + ], + [ + 1739777739, + "7.408275515799868" + ], + [ + 1739777799, + "7.007809097249947" + ], + [ + 1739777859, + "7.50864402975003" + ], + [ + 1739777919, + "8.509629683750063" + ], + [ + 1739777979, + "8.509553631500012" + ], + [ + 1739778039, + "8.008584409499917" + ], + [ + 1739778099, + "8.258727426499945" + ], + [ + 1739778159, + "8.008535119000044" + ], + [ + 1739778219, + "8.408937905199855" + ], + [ + 1739778279, + "8.759193389999837" + ], + [ + 1739778339, + "9.509662840999908" + ], + [ + 1739778399, + "9.259424023999827" + ], + [ + 1739778459, + "9.25870603574981" + ], + [ + 1739778519, + "9.258579091250112" + ], + [ + 1739778579, + "8.00799186925019" + ], + [ + 1739778639, + "8.008101848250135" + ], + [ + 1739778699, + "7.607169210600114" + ], + [ + 1739778759, + "7.406147014999988" + ], + [ + 1739778819, + "6.755173450749909" + ], + [ + 1739778879, + "8.006494438499885" + ], + [ + 1739778939, + "7.25498146699988" + ], + [ + 1739778999, + "7.005592500750026" + ], + [ + 1739779059, + "6.256502665000198" + ], + [ + 1739779119, + "5.255490649499961" + ], + [ + 1739779179, + "6.257142547749936" + ], + [ + 1739779239, + "6.0062971023999125" + ], + [ + 1739779299, + "6.205825084799835" + ], + [ + 1739779359, + "7.206820759999936" + ], + [ + 1739779419, + "7.506797687250128" + ], + [ + 1739779479, + "7.757157862500208" + ], + [ + 1739779539, + "8.008148490500162" + ], + [ + 1739779599, + "8.758846582750039" + ], + [ + 1739779659, + "7.507621480750004" + ], + [ + 1739779719, + "6.756240088249796" + ], + [ + 1739779779, + "7.4070246615998245" + ], + [ + 1739779839, + "8.207858220999878" + ], + [ + 1739779899, + "8.008244007249914" + ], + [ + 1739779959, + "9.009214101499765" + ], + [ + 1739780019, + "9.009816711249869" + ], + [ + 1739780079, + "8.258543936749902" + ], + [ + 1739780139, + "7.50785377599982" + ], + [ + 1739780199, + "6.506054522999876" + ], + [ + 1739780259, + "6.755486902249913" + ], + [ + 1739780319, + "7.406415916200057" + ], + [ + 1739780379, + "7.807376683200163" + ], + [ + 1739780439, + "8.007149607250312" + ], + [ + 1739780499, + "8.257913026250435" + ], + [ + 1739780559, + "8.759074039000325" + ], + [ + 1739780619, + "8.759044418250369" + ], + [ + 1739780679, + "8.50753272650013" + ], + [ + 1739780739, + "8.256971091999958" + ], + [ + 1739780799, + "7.255919399500044" + ], + [ + 1739780859, + "6.805916578000142" + ], + [ + 1739780919, + "6.005136625199884" + ], + [ + 1739780979, + "5.505463550249942" + ], + [ + 1739781039, + "6.25496485650001" + ], + [ + 1739781099, + "6.004087576500297" + ], + [ + 1739781159, + "6.504028474750157" + ], + [ + 1739781219, + "6.50411246475005" + ], + [ + 1739781279, + "7.006252243750168" + ], + [ + 1739781339, + "8.25810831050012" + ], + [ + 1739781399, + "8.508200821750052" + ], + [ + 1739781459, + "8.408000972399895" + ], + [ + 1739781519, + "9.008388794250095" + ], + [ + 1739781579, + "7.756952079000257" + ], + [ + 1739781639, + "7.2567249945000185" + ], + [ + 1739781699, + "7.506822841750362" + ], + [ + 1739781759, + "7.506679440750759" + ], + [ + 1739781819, + "8.507886307500485" + ], + [ + 1739781879, + "7.756432367500565" + ], + [ + 1739781939, + "7.606200286200669" + ], + [ + 1739781999, + "7.406152514800465" + ], + [ + 1739782059, + "7.256033650750368" + ], + [ + 1739782119, + "6.755099485000727" + ], + [ + 1739782179, + "6.755763347500732" + ], + [ + 1739782239, + "6.756299932000275" + ], + [ + 1739782299, + "7.2569120345001465" + ], + [ + 1739782359, + "8.008266116499726" + ], + [ + 1739782419, + "9.00908641074966" + ], + [ + 1739782479, + "8.507753000999855" + ], + [ + 1739782539, + "8.60804555839968" + ], + [ + 1739782599, + "8.257896979999714" + ], + [ + 1739782659, + "7.506653164999534" + ], + [ + 1739782719, + "6.505579416999353" + ], + [ + 1739782779, + "6.50559204999945" + ], + [ + 1739782839, + "6.5056406374997096" + ], + [ + 1739782899, + "7.0066210424997735" + ], + [ + 1739782959, + "7.2570096924996506" + ], + [ + 1739783019, + "6.806151251599658" + ], + [ + 1739783079, + "7.4069048493998695" + ], + [ + 1739783139, + "7.006278527249833" + ], + [ + 1739783199, + "7.0063131767501545" + ], + [ + 1739783259, + "6.756238205750378" + ], + [ + 1739783319, + "8.007956536500387" + ], + [ + 1739783379, + "7.507973012250204" + ], + [ + 1739783439, + "6.757033744750061" + ], + [ + 1739783499, + "7.006845554750271" + ], + [ + 1739783559, + "6.606582078599967" + ], + [ + 1739783619, + "5.805888140999741" + ], + [ + 1739783679, + "5.6053118285995875" + ], + [ + 1739783739, + "6.005154508799387" + ], + [ + 1739783799, + "6.0050672702491275" + ], + [ + 1739783859, + "6.255105172499498" + ], + [ + 1739783919, + "6.254896438999822" + ], + [ + 1739783979, + "6.2547545089996675" + ], + [ + 1739784039, + "6.5050586742499945" + ], + [ + 1739784099, + "6.504393726249873" + ], + [ + 1739784159, + "6.254548460000023" + ], + [ + 1739784219, + "6.8054298293998725" + ], + [ + 1739784279, + "7.2066028612000075" + ], + [ + 1739784339, + "6.756566345749889" + ], + [ + 1739784399, + "7.758117361249788" + ], + [ + 1739784459, + "9.009071176749787" + ], + [ + 1739784519, + "9.008893714999886" + ], + [ + 1739784579, + "9.509336027500012" + ], + [ + 1739784639, + "9.50848044175018" + ], + [ + 1739784699, + "8.507456790250217" + ], + [ + 1739784759, + "8.607977935000235" + ], + [ + 1739784819, + "8.507911053750377" + ], + [ + 1739784879, + "7.757198337000318" + ], + [ + 1739784939, + "6.507019269750344" + ], + [ + 1739784999, + "7.0070338497503135" + ], + [ + 1739785059, + "6.506306887000392" + ], + [ + 1739785119, + "6.506179266500567" + ], + [ + 1739785179, + "7.006924671500656" + ], + [ + 1739785239, + "7.508009333750579" + ], + [ + 1739785299, + "7.007598072800465" + ], + [ + 1739785359, + "6.607329328200285" + ], + [ + 1739785419, + "6.40648359600018" + ], + [ + 1739785479, + "6.2560877297501065" + ], + [ + 1739785539, + "5.004776942249919" + ], + [ + 1739785599, + "5.754398218749884" + ], + [ + 1739785659, + "5.753768947250137" + ], + [ + 1739785719, + "6.504681838000124" + ], + [ + 1739785779, + "7.505842333000146" + ], + [ + 1739785839, + "7.75704187600013" + ], + [ + 1739785899, + "7.807142460999966" + ], + [ + 1739785959, + "8.608516343799783" + ], + [ + 1739786019, + "8.759091488749618" + ], + [ + 1739786079, + "8.257360965999851" + ], + [ + 1739786139, + "8.257390547249543" + ], + [ + 1739786199, + "8.758021212999665" + ], + [ + 1739786259, + "8.50700592999965" + ], + [ + 1739786319, + "8.007658972999252" + ], + [ + 1739786379, + "8.007929957999659" + ], + [ + 1739786439, + "7.807287177800025" + ], + [ + 1739786499, + "7.256592106250082" + ], + [ + 1739786559, + "7.757845622000332" + ], + [ + 1739786619, + "7.757812725500571" + ], + [ + 1739786679, + "7.255932676250268" + ], + [ + 1739786739, + "6.505975235000278" + ], + [ + 1739786799, + "6.254746230999899" + ], + [ + 1739786859, + "6.505287912749736" + ], + [ + 1739786919, + "6.605684546399789" + ], + [ + 1739786979, + "7.4076184249999635" + ], + [ + 1739787039, + "7.607331468999837" + ], + [ + 1739787099, + "7.257614290499987" + ], + [ + 1739787159, + "8.008109908500046" + ], + [ + 1739787219, + "7.507706260250415" + ], + [ + 1739787279, + "6.506457530999796" + ], + [ + 1739787339, + "6.256748055249773" + ], + [ + 1739787399, + "5.255931343499925" + ], + [ + 1739787459, + "5.755976384749374" + ], + [ + 1739787519, + "6.4056667805994945" + ], + [ + 1739787579, + "6.205686133399649" + ], + [ + 1739787639, + "7.206648664799649" + ], + [ + 1739787699, + "7.756845224499557" + ], + [ + 1739787759, + "8.507863866999742" + ], + [ + 1739787819, + "8.008155790499586" + ], + [ + 1739787879, + "9.008084149249498" + ], + [ + 1739787939, + "8.50700700724974" + ], + [ + 1739787999, + "8.006739632249719" + ], + [ + 1739788059, + "8.206862436199662" + ], + [ + 1739788119, + "8.50717903074974" + ], + [ + 1739788179, + "8.506177447499795" + ], + [ + 1739788239, + "8.757055432499783" + ], + [ + 1739788299, + "8.00633324024966" + ], + [ + 1739788359, + "7.505597507999482" + ], + [ + 1739788419, + "6.756879635249788" + ], + [ + 1739788479, + "5.755885748249511" + ], + [ + 1739788539, + "6.256382487249539" + ], + [ + 1739788599, + "6.606263282999498" + ], + [ + 1739788659, + "6.606509446799827" + ], + [ + 1739788719, + "6.756361670499699" + ], + [ + 1739788779, + "7.256847274499705" + ], + [ + 1739788839, + "8.006612696499815" + ], + [ + 1739788899, + "8.507642182500149" + ], + [ + 1739788959, + "8.757758371500131" + ], + [ + 1739789019, + "8.257223546750083" + ], + [ + 1739789079, + "7.757701622000241" + ], + [ + 1739789139, + "7.6077026810002275" + ], + [ + 1739789199, + "7.006821810800101" + ], + [ + 1739789259, + "6.757000689000051" + ], + [ + 1739789319, + "6.756931215750228" + ], + [ + 1739789379, + "6.756295348000094" + ], + [ + 1739789439, + "7.256105054250838" + ], + [ + 1739789499, + "7.50702966300105" + ], + [ + 1739789559, + "8.508131645501635" + ], + [ + 1739789619, + "8.007829722001588" + ], + [ + 1739789679, + "8.208088049801153" + ], + [ + 1739789739, + "7.407878529001027" + ], + [ + 1739789799, + "7.257548537751062" + ], + [ + 1739789859, + "7.507809745000486" + ], + [ + 1739789919, + "8.259023528999933" + ], + [ + 1739789979, + "8.25906597499943" + ], + [ + 1739790039, + "9.510319727248316" + ], + [ + 1739790099, + "8.759468004248447" + ], + [ + 1739790159, + "7.757524089499383" + ], + [ + 1739790219, + "7.807681879599112" + ], + [ + 1739790279, + "7.207106722399475" + ], + [ + 1739790339, + "6.506155116249829" + ], + [ + 1739790399, + "7.256610873499994" + ], + [ + 1739790459, + "7.757528367249506" + ], + [ + 1739790519, + "7.006904954750098" + ], + [ + 1739790579, + "6.756148491000204" + ], + [ + 1739790639, + "6.756098465749346" + ], + [ + 1739790699, + "7.406681810799636" + ], + [ + 1739790759, + "7.0066033943992805" + ], + [ + 1739790819, + "7.406769480399088" + ], + [ + 1739790879, + "8.00761447449895" + ], + [ + 1739790939, + "7.507396508249258" + ], + [ + 1739790999, + "7.006638751249739" + ], + [ + 1739791059, + "7.506741080750544" + ], + [ + 1739791119, + "7.756516784001178" + ], + [ + 1739791179, + "7.255868693751836" + ], + [ + 1739791239, + "7.756859645501208" + ], + [ + 1739791299, + "7.606640099001378" + ], + [ + 1739791359, + "7.756817768251494" + ], + [ + 1739791419, + "7.757055970751026" + ], + [ + 1739791479, + "7.756929871250577" + ], + [ + 1739791539, + "6.755962184750387" + ], + [ + 1739791599, + "7.256807451999521" + ], + [ + 1739791659, + "7.506789670249418" + ], + [ + 1739791719, + "8.507846381248783" + ], + [ + 1739791779, + "9.007917742498648" + ], + [ + 1739791839, + "8.206929353998566" + ], + [ + 1739791899, + "7.40618286619865" + ], + [ + 1739791959, + "7.006070003748391" + ] + ] + }, + { + "metric": { + "container": "fastapi-app", + "endpoint": "http", + "handler": "/metrics", + "instance": "10.244.0.19:8000", + "job": "customer-orders-service", + "method": "GET", + "namespace": "default", + "pod": "customer-orders-6f5cbdf85-c5fsf", + "service": "customer-orders-service" + }, + "values": [ + [ + 1739772879, + "0.0012452383865412821" + ], + [ + 1739772939, + "0.0011021824072294769" + ], + [ + 1739772999, + "0.0011294081469067538" + ], + [ + 1739773059, + "0.001135836655142644" + ], + [ + 1739773119, + "0.0011470835526306712" + ], + [ + 1739773179, + "0.00115186292105525" + ], + [ + 1739773239, + "0.0011534796842116329" + ], + [ + 1739773299, + "0.0011486553157887532" + ], + [ + 1739773359, + "0.0012556393421005538" + ], + [ + 1739773419, + "0.001283323973672467" + ], + [ + 1739773479, + "0.0012944506578827185" + ], + [ + 1739773539, + "0.0012690807105137105" + ], + [ + 1739773599, + "0.0012868301052435525" + ], + [ + 1739773659, + "0.0012122121842020007" + ], + [ + 1739773719, + "0.001191606684199685" + ], + [ + 1739773779, + "0.0012086908157824068" + ], + [ + 1739773839, + "0.001214202447368064" + ], + [ + 1739773899, + "0.0011817770263178316" + ], + [ + 1739773959, + "0.0011689156315780659" + ], + [ + 1739774019, + "0.0011344907631526343" + ], + [ + 1739774079, + "0.0011330818420889812" + ], + [ + 1739774139, + "0.0011494284736595784" + ], + [ + 1739774199, + "0.0011701677631328108" + ], + [ + 1739774259, + "0.0011785201841899163" + ], + [ + 1739774319, + "0.0012201845263103007" + ], + [ + 1739774379, + "0.0012404352105357629" + ], + [ + 1739774439, + "0.0012217556052710756" + ], + [ + 1739774499, + "0.0012066817368553242" + ], + [ + 1739774559, + "0.0011923439210656732" + ], + [ + 1739774619, + "0.0011694210263254047" + ], + [ + 1739774679, + "0.0011751934736917303" + ], + [ + 1739774739, + "0.0011791902105343117" + ], + [ + 1739774799, + "0.001175995447367409" + ], + [ + 1739774859, + "0.001187559289471545" + ], + [ + 1739774919, + "0.0011843552104940163" + ], + [ + 1739774979, + "0.0011656749210380056" + ], + [ + 1739775039, + "0.0011786240789110466" + ], + [ + 1739775099, + "0.0012222277105018674" + ], + [ + 1739775159, + "0.0012404070789312084" + ], + [ + 1739775219, + "0.0012831252368438358" + ], + [ + 1739775279, + "0.0012967554736836684" + ], + [ + 1739775339, + "0.001318222631605175" + ], + [ + 1739775399, + "0.0013250136578933864" + ], + [ + 1739775459, + "0.0012612806053048666" + ], + [ + 1739775519, + "0.0012317406842492727" + ], + [ + 1739775579, + "0.0012117342105404149" + ], + [ + 1739775639, + "0.0011665133684250336" + ], + [ + 1739775699, + "0.0011342130263098" + ], + [ + 1739775759, + "0.0011617333420992125" + ], + [ + 1739775819, + "0.001180116421042648" + ], + [ + 1739775879, + "0.0011980422631567259" + ], + [ + 1739775939, + "0.0011673096315749525" + ], + [ + 1739775999, + "0.0012106526315787242" + ], + [ + 1739776059, + "0.001207485263158129" + ], + [ + 1739776119, + "0.0012190188947083936" + ], + [ + 1739776179, + "0.0012268279736441685" + ], + [ + 1739776239, + "0.0011919777368084874" + ], + [ + 1739776299, + "0.0011854715525930115" + ], + [ + 1739776359, + "0.0012032467894689607" + ], + [ + 1739776419, + "0.00119225228949242" + ], + [ + 1739776479, + "0.0012415669737038115" + ], + [ + 1739776539, + "0.00127350868423922" + ], + [ + 1739776599, + "0.0013054765263404988" + ], + [ + 1739776659, + "0.001290266236848861" + ], + [ + 1739776719, + "0.001291975842138383" + ], + [ + 1739776779, + "0.0012385826842450784" + ], + [ + 1739776839, + "0.0012258659473780807" + ], + [ + 1739776899, + "0.0011838809210339126" + ], + [ + 1739776959, + "0.0011804123947082475" + ], + [ + 1739777019, + "0.0011929527368130563" + ], + [ + 1739777079, + "0.0012255948946451672" + ], + [ + 1739777139, + "0.0012238133946610966" + ], + [ + 1739777199, + "0.001216830499958489" + ], + [ + 1739777259, + "0.0011892045262787798" + ], + [ + 1739777319, + "0.0011627112104705653" + ], + [ + 1739777379, + "0.0011304774210370132" + ], + [ + 1739777439, + "0.0011219362894687372" + ], + [ + 1739777499, + "0.0011565576578265646" + ], + [ + 1739777559, + "0.0011767267631177288" + ], + [ + 1739777619, + "0.001203721473613864" + ], + [ + 1739777679, + "0.0012196116315288618" + ], + [ + 1739777739, + "0.001185833736790779" + ], + [ + 1739777799, + "0.0011569347631465413" + ], + [ + 1739777859, + "0.0011834574736559788" + ], + [ + 1739777919, + "0.001169089447366308" + ], + [ + 1739777979, + "0.001181287763134414" + ], + [ + 1739778039, + "0.0011950855789205218" + ], + [ + 1739778099, + "0.0011956171315911264" + ], + [ + 1739778159, + "0.001189542605324708" + ], + [ + 1739778219, + "0.0012056763421720747" + ], + [ + 1739778279, + "0.0012039200263835052" + ], + [ + 1739778339, + "0.0011949763158650296" + ], + [ + 1739778399, + "0.0011846133158398612" + ], + [ + 1739778459, + "0.0012063784210544037" + ], + [ + 1739778519, + "0.0012242177368343137" + ], + [ + 1739778579, + "0.0012164939473797475" + ], + [ + 1739778639, + "0.001226517500000275" + ], + [ + 1739778699, + "0.0012087204473860574" + ], + [ + 1739778759, + "0.0011678999999994151" + ], + [ + 1739778819, + "0.0011687038947074094" + ], + [ + 1739778879, + "0.0011970142631023343" + ], + [ + 1739778939, + "0.0011737270788934869" + ], + [ + 1739778999, + "0.001189326684174088" + ], + [ + 1739779059, + "0.001190344500005985" + ], + [ + 1739779119, + "0.0011516685526418624" + ], + [ + 1739779179, + "0.0011230707105871834" + ], + [ + 1739779239, + "0.0011881868421800103" + ], + [ + 1739779299, + "0.0011840153421728342" + ], + [ + 1739779359, + "0.0011781441053244632" + ], + [ + 1739779419, + "0.001188619026379456" + ], + [ + 1739779479, + "0.0011691458684684843" + ], + [ + 1739779539, + "0.0011576887631427348" + ], + [ + 1739779599, + "0.001153810184237763" + ], + [ + 1739779659, + "0.0011647171052955902" + ], + [ + 1739779719, + "0.0011839855263276963" + ], + [ + 1739779779, + "0.001153958736889954" + ], + [ + 1739779839, + "0.001163187815875158" + ], + [ + 1739779899, + "0.0011844375000548194" + ], + [ + 1739779959, + "0.001188830000046437" + ], + [ + 1739780019, + "0.0012252005000223823" + ], + [ + 1739780079, + "0.0012455845263072036" + ], + [ + 1739780139, + "0.0012380379473478041" + ], + [ + 1739780199, + "0.0012396869210054941" + ], + [ + 1739780259, + "0.001196281552582054" + ], + [ + 1739780319, + "0.0011970753683654995" + ], + [ + 1739780379, + "0.0011913292631489686" + ], + [ + 1739780439, + "0.0012147320000239184" + ], + [ + 1739780499, + "0.0012132596315687374" + ], + [ + 1739780559, + "0.0012121927368417872" + ], + [ + 1739780619, + "0.001186878552624349" + ], + [ + 1739780679, + "0.001200178842066778" + ], + [ + 1739780739, + "0.0011874605789240664" + ], + [ + 1739780799, + "0.001193677157899449" + ], + [ + 1739780859, + "0.0012063521841905879" + ], + [ + 1739780919, + "0.0012312662106045247" + ], + [ + 1739780979, + "0.0012181446315097624" + ], + [ + 1739781039, + "0.0012645707104303782" + ], + [ + 1739781099, + "0.0012476974998207877" + ], + [ + 1739781159, + "0.0012539288681613183" + ], + [ + 1739781219, + "0.0012483303418392958" + ], + [ + 1739781279, + "0.001235638394638571" + ], + [ + 1739781339, + "0.001250489815615047" + ], + [ + 1739781399, + "0.0012529362894910215" + ], + [ + 1739781459, + "0.0012530619474809528" + ], + [ + 1739781519, + "0.0012655263948736797" + ], + [ + 1739781579, + "0.0012705405264760452" + ], + [ + 1739781639, + "0.0012397417896959507" + ], + [ + 1739781699, + "0.0012277347370017147" + ], + [ + 1739781759, + "0.0012296958159487602" + ], + [ + 1739781819, + "0.0012108243686031383" + ], + [ + 1739781879, + "0.0012054235001797726" + ], + [ + 1739781939, + "0.001219491000244345" + ], + [ + 1739781999, + "0.0012194646317431378" + ], + [ + 1739782059, + "0.0012307463159327437" + ], + [ + 1739782119, + "0.0012429551843045996" + ], + [ + 1739782179, + "0.0012631387631987117" + ], + [ + 1739782239, + "0.001213865263085417" + ], + [ + 1739782299, + "0.001223264657758256" + ], + [ + 1739782359, + "0.0012216671578387734" + ], + [ + 1739782419, + "0.001198508289522387" + ], + [ + 1739782479, + "0.0012054404736964465" + ], + [ + 1739782539, + "0.0012257149735791916" + ], + [ + 1739782599, + "0.0012441268156276476" + ], + [ + 1739782659, + "0.0012549833156395164" + ], + [ + 1739782719, + "0.0012481877892672568" + ], + [ + 1739782779, + "0.0012525283945203562" + ], + [ + 1739782839, + "0.0012412602893738564" + ], + [ + 1739782899, + "0.0012222647893921882" + ], + [ + 1739782959, + "0.0012127628421551074" + ], + [ + 1739783019, + "0.001189594605373139" + ], + [ + 1739783079, + "0.001171396500180172" + ], + [ + 1739783139, + "0.0011822708687472378" + ], + [ + 1739783199, + "0.0011516235528305466" + ], + [ + 1739783259, + "0.001123888026373415" + ], + [ + 1739783319, + "0.0011338010263946335" + ], + [ + 1739783379, + "0.0011175123157188768" + ], + [ + 1739783439, + "0.001101427947201285" + ], + [ + 1739783499, + "0.0011359339209536157" + ], + [ + 1739783559, + "0.0011929987630011102" + ], + [ + 1739783619, + "0.0011781149472115335" + ], + [ + 1739783679, + "0.0011959323946460867" + ], + [ + 1739783739, + "0.001272350315766265" + ], + [ + 1739783799, + "0.0013101245525544262" + ], + [ + 1739783859, + "0.001303722921061547" + ], + [ + 1739783919, + "0.0013194413947865923" + ], + [ + 1739783979, + "0.0013447907631416456" + ], + [ + 1739784039, + "0.0012852255262930012" + ], + [ + 1739784099, + "0.0012328958946454804" + ], + [ + 1739784159, + "0.0012187291578907445" + ], + [ + 1739784219, + "0.0012177092105144093" + ], + [ + 1739784279, + "0.0012003229737130917" + ], + [ + 1739784339, + "0.0011935900263683593" + ], + [ + 1739784399, + "0.001217946236800727" + ], + [ + 1739784459, + "0.0012278380789116216" + ], + [ + 1739784519, + "0.0012615707631228265" + ], + [ + 1739784579, + "0.0012357461314662175" + ], + [ + 1739784639, + "0.001244307973617029" + ], + [ + 1739784699, + "0.0012131706052969093" + ], + [ + 1739784759, + "0.0012251488684629102" + ], + [ + 1739784819, + "0.0012695369474673306" + ], + [ + 1739784879, + "0.0012991347634133967" + ], + [ + 1739784939, + "0.0012977402107265477" + ], + [ + 1739784999, + "0.0013180940263976624" + ], + [ + 1739785059, + "0.0012734894737245906" + ], + [ + 1739785119, + "0.001295302315779018" + ], + [ + 1739785179, + "0.0013156976578293802" + ], + [ + 1739785239, + "0.0013025044736423297" + ], + [ + 1739785299, + "0.0012712879473072366" + ], + [ + 1739785359, + "0.0012884562103959308" + ], + [ + 1739785419, + "0.001265879157759436" + ], + [ + 1739785479, + "0.0012634348419229163" + ], + [ + 1739785539, + "0.0012671580263103302" + ], + [ + 1739785599, + "0.0012402374472194901" + ], + [ + 1739785659, + "0.0011917212104411814" + ], + [ + 1739785719, + "0.0011627890263346335" + ], + [ + 1739785779, + "0.00114936586841826" + ], + [ + 1739785839, + "0.0011621932894501552" + ], + [ + 1739785899, + "0.0011847695265092431" + ], + [ + 1739785959, + "0.0011747959475350827" + ], + [ + 1739786019, + "0.0012058744212864487" + ], + [ + 1739786079, + "0.0012175978686891058" + ], + [ + 1739786139, + "0.0012239262633125978" + ], + [ + 1739786199, + "0.0011931712368478703" + ], + [ + 1739786259, + "0.0012284960525664101" + ], + [ + 1739786319, + "0.001194044868354563" + ], + [ + 1739786379, + "0.0011821863683603862" + ], + [ + 1739786439, + "0.0011616816580403552" + ], + [ + 1739786499, + "0.001164486473786191" + ], + [ + 1739786559, + "0.0011737896579928162" + ], + [ + 1739786619, + "0.0011552616317299237" + ], + [ + 1739786679, + "0.0012214830000486566" + ], + [ + 1739786739, + "0.0012242889474931105" + ], + [ + 1739786799, + "0.0012133733158694474" + ], + [ + 1739786859, + "0.001233963789577262" + ], + [ + 1739786919, + "0.00123786657896792" + ], + [ + 1739786979, + "0.0012523807895137762" + ], + [ + 1739787039, + "0.0012765136579649772" + ], + [ + 1739787099, + "0.0012762117895411059" + ], + [ + 1739787159, + "0.0012665708947775113" + ], + [ + 1739787219, + "0.0012451316578287667" + ], + [ + 1739787279, + "0.0011991776051026566" + ], + [ + 1739787339, + "0.0011991311839665286" + ], + [ + 1739787399, + "0.0011673921050092567" + ], + [ + 1739787459, + "0.0011733921050778773" + ], + [ + 1739787519, + "0.0011747593682325953" + ], + [ + 1739787579, + "0.0011590424209268683" + ], + [ + 1739787639, + "0.0011856463157336723" + ], + [ + 1739787699, + "0.0012204303420252183" + ], + [ + 1739787759, + "0.0012336861578468845" + ], + [ + 1739787819, + "0.0012438074736564884" + ], + [ + 1739787879, + "0.0012575933419851169" + ], + [ + 1739787939, + "0.0012748296841987449" + ], + [ + 1739787999, + "0.0012548244473548132" + ], + [ + 1739788059, + "0.001256918421045396" + ], + [ + 1739788119, + "0.0012095183158300257" + ], + [ + 1739788179, + "0.0012353286052817374" + ], + [ + 1739788239, + "0.0012623579737249344" + ], + [ + 1739788299, + "0.0012810851577868512" + ], + [ + 1739788359, + "0.0013011450525817435" + ], + [ + 1739788419, + "0.0012840925261403562" + ], + [ + 1739788479, + "0.0012804349998508482" + ], + [ + 1739788539, + "0.0012128312366353623" + ], + [ + 1739788599, + "0.0011838883683594759" + ], + [ + 1739788659, + "0.0011840583420053008" + ], + [ + 1739788719, + "0.0011833225526061142" + ], + [ + 1739788779, + "0.0011513764736120814" + ], + [ + 1739788839, + "0.0011467447368072737" + ], + [ + 1739788899, + "0.0011199430526080264" + ], + [ + 1739788959, + "0.0010943155790109334" + ], + [ + 1739789019, + "0.0010856926052694785" + ], + [ + 1739789079, + "0.0011163637107433474" + ], + [ + 1739789139, + "0.0011230523160089718" + ], + [ + 1739789199, + "0.0011303285527422924" + ], + [ + 1739789259, + "0.0011504116316102323" + ], + [ + 1739789319, + "0.0011616775262537932" + ], + [ + 1739789379, + "0.0011509905263282186" + ], + [ + 1739789439, + "0.001135927184134887" + ], + [ + 1739789499, + "0.0011405647104478895" + ], + [ + 1739789559, + "0.001154124184196055" + ], + [ + 1739789619, + "0.0011711976843032873" + ], + [ + 1739789679, + "0.0011812759476015342" + ], + [ + 1739789739, + "0.0012035877634365246" + ], + [ + 1739789799, + "0.001211816658101988" + ], + [ + 1739789859, + "0.0012147785529171078" + ], + [ + 1739789919, + "0.0012267888684475204" + ], + [ + 1739789979, + "0.0012007500263280233" + ], + [ + 1739790039, + "0.001224611894557663" + ], + [ + 1739790099, + "0.0011709251315264017" + ], + [ + 1739790159, + "0.001179241500051974" + ], + [ + 1739790219, + "0.0011746471315602417" + ], + [ + 1739790279, + "0.0011857904475235906" + ], + [ + 1739790339, + "0.0012106941315945924" + ], + [ + 1739790399, + "0.0012362302629634013" + ], + [ + 1739790459, + "0.0012169124472022398" + ], + [ + 1739790519, + "0.0012604539209094484" + ], + [ + 1739790579, + "0.001251737526074162" + ], + [ + 1739790639, + "0.001240572710326217" + ], + [ + 1739790699, + "0.0012782500789877591" + ], + [ + 1739790759, + "0.0012499899472267744" + ], + [ + 1739790819, + "0.0012627959471214316" + ], + [ + 1739790879, + "0.0012542925261713159" + ], + [ + 1739790939, + "0.0012139848153502066" + ], + [ + 1739790999, + "0.0011975972098106898" + ], + [ + 1739791059, + "0.0012375059469425316" + ], + [ + 1739791119, + "0.0012196287627471304" + ], + [ + 1739791179, + "0.0012414592366487914" + ], + [ + 1739791239, + "0.0012427445787915002" + ], + [ + 1739791299, + "0.0012299963950920898" + ], + [ + 1739791359, + "0.001193801684406169" + ], + [ + 1739791419, + "0.0012000323949905578" + ], + [ + 1739791479, + "0.0012537508420408764" + ], + [ + 1739791539, + "0.0012653948946727293" + ], + [ + 1739791599, + "0.00131244555239774" + ], + [ + 1739791659, + "0.0013285718681512852" + ], + [ + 1739791719, + "0.0013191252363819665" + ], + [ + 1739791779, + "0.0012512513419910406" + ], + [ + 1739791839, + "0.0012393901579343947" + ], + [ + 1739791899, + "0.001182507684509466" + ], + [ + 1739791959, + "0.0011744442896375212" + ] + ] + } + ] + }, + "random_key": "9kLK", + "tool_name": "execute_prometheus_range_query", + "description": "Average HTTP request latency for customer-orders-service", + "query": "rate(http_request_duration_seconds_sum{service=\"customer-orders-service\"}[5m]) / rate(http_request_duration_seconds_count{service=\"customer-orders-service\"}[5m])", + "start": "1739705559", + "end": "1739791959", + "step": 60 +} diff --git a/tests/llm/fixtures/test_ask_holmes/34_http_latency_graph/get_current_time.txt b/tests/llm/fixtures/test_ask_holmes/34_http_latency_graph/get_current_time.txt new file mode 100644 index 00000000..8db0e8cf --- /dev/null +++ b/tests/llm/fixtures/test_ask_holmes/34_http_latency_graph/get_current_time.txt @@ -0,0 +1,2 @@ +{"toolset_name":"datetime","tool_name":"get_current_time","match_params":{}} +The current UTC date and time are 2025-02-17 11:32:39.132689+00:00. The current UTC timestamp in seconds is 1739791959. diff --git a/tests/llm/fixtures/test_ask_holmes/34_http_latency_graph/helm/Dockerfile b/tests/llm/fixtures/test_ask_holmes/34_http_latency_graph/helm/Dockerfile new file mode 100644 index 00000000..f932d018 --- /dev/null +++ b/tests/llm/fixtures/test_ask_holmes/34_http_latency_graph/helm/Dockerfile @@ -0,0 +1,19 @@ +FROM python:3.10-slim + +# Set working directory +WORKDIR /app + +# Copy requirements.txt +COPY requirements.txt . + +# Install dependencies +RUN pip install --no-cache-dir -r requirements.txt + +# Copy the FastAPI app +COPY . . + +# Expose the ports +EXPOSE 8000 8001 + +# Run the FastAPI app +CMD ["uvicorn", "app:app", "--host", "0.0.0.0", "--port", "8000"] diff --git a/tests/llm/fixtures/test_ask_holmes/34_http_latency_graph/helm/app.py b/tests/llm/fixtures/test_ask_holmes/34_http_latency_graph/helm/app.py new file mode 100644 index 00000000..4a88c70c --- /dev/null +++ b/tests/llm/fixtures/test_ask_holmes/34_http_latency_graph/helm/app.py @@ -0,0 +1,79 @@ +# ruff: noqa: F821 +import os +import logging +import time +from fastapi import FastAPI +from fastapi.responses import HTMLResponse +from prometheus_fastapi_instrumentator import Instrumentator +from random import randint +from time import sleep + +app = FastAPI() + +# Configure logging +logging.basicConfig(level=logging.INFO) +logger = logging.getLogger(__name__) + +# Database connection settings +DB_USERNAME = os.getenv("DB_USERNAME") +DB_PASSWORD = os.getenv("DB_PASSWORD") +DB_HOST = os.getenv("DB_HOST") +DB_DATABASE = os.getenv("DB_DATABASE") +DB_URL = f"mysql+pymysql://{DB_USERNAME}:{DB_PASSWORD}@{DB_HOST}/{DB_DATABASE}" +STORED_PROCEDURE = "sp_CheckUserNotifications" + +# Initialize database connection + +# Add Prometheus middleware +Instrumentator().instrument(app).expose(app) + + +def check_promotional_notifications(): + logger.info( + "Connecting to promotions database to see if we should try to upsell user" + ) + try: + logger.info(f"Connecting to database at {DB_HOST}") + start_time = time.time() + logger.info(f"Fetching data using stored procedure: {STORED_PROCEDURE}") + # Execute the stored procedure + # + sleep(randint(5, 10)) + + # Fetch the result + result = [(True, {"type": "notification", "discount": "$15"})] + end_time = time.time() + logger.info(f"Database call completed in {end_time - start_time:.2f} seconds.") + for row in result: + notifications = row[0] # Access the first element of the tuple + logger.info(f"Promotions result: {notifications}") + return notifications + except Exception as e: + logger.error(f"Error checking for promotions: {e}") + return False + + +@app.get("/", response_class=HTMLResponse) +def read_root(): + logger.info("Received request for checkout page.") + start_time = time.time() + has_promotions = check_promotional_notifications() + end_time = time.time() + logger.info(f"Page rendered in {end_time - start_time:.2f} seconds.") + return f""" + + + Checkout Status + + +

Success!

+

Promotions: {has_promotions}

+ + + """ + + +if __name__ == "__main__": + # Start Prometheus metrics server + start_http_server(8001) + uvicorn.run(app, host="0.0.0.0", port=8000) diff --git a/tests/llm/fixtures/test_ask_holmes/34_http_latency_graph/helm/build.sh b/tests/llm/fixtures/test_ask_holmes/34_http_latency_graph/helm/build.sh new file mode 100755 index 00000000..0d86d9a6 --- /dev/null +++ b/tests/llm/fixtures/test_ask_holmes/34_http_latency_graph/helm/build.sh @@ -0,0 +1 @@ +docker buildx build --platform linux/amd64 . -t us-central1-docker.pkg.dev/genuine-flight-317411/devel/rds-demo:no-db-v1 diff --git a/tests/llm/fixtures/test_ask_holmes/34_http_latency_graph/helm/manifest.yaml b/tests/llm/fixtures/test_ask_holmes/34_http_latency_graph/helm/manifest.yaml new file mode 100644 index 00000000..fef0140f --- /dev/null +++ b/tests/llm/fixtures/test_ask_holmes/34_http_latency_graph/helm/manifest.yaml @@ -0,0 +1,81 @@ +apiVersion: apps/v1 +kind: Deployment +metadata: + name: customer-orders +spec: + replicas: 1 + selector: + matchLabels: + app: customer-orders + template: + metadata: + labels: + app: customer-orders + spec: + containers: + - name: fastapi-app + image: us-central1-docker.pkg.dev/genuine-flight-317411/devel/rds-demo:no-db-v1 + ports: + - containerPort: 8000 + - containerPort: 8001 + - name: curl-sidecar + image: curlimages/curl + args: + - /bin/sh + - -c + - while true; do curl -s http://localhost:8000; sleep 60; done +--- +apiVersion: v1 +kind: Service +metadata: + name: customer-orders-service + labels: + app: customer-orders +spec: + selector: + app: customer-orders + ports: + - protocol: TCP + port: 80 + targetPort: 8000 + name: http + type: ClusterIP +--- +apiVersion: monitoring.coreos.com/v1 +kind: ServiceMonitor +metadata: + name: customer-orders-service-monitor + labels: + release: robusta +spec: + selector: + matchLabels: + app: customer-orders + endpoints: + - port: http + path: /metrics + interval: 15s + namespaceSelector: + matchNames: + - default +--- +apiVersion: monitoring.coreos.com/v1 +kind: PrometheusRule +metadata: + name: customer-orders-alert-rules + labels: + release: robusta +spec: + groups: + - name: customerorders.rules + rules: + - alert: HighLatencyForCustomerCheckout + expr: rate(http_request_duration_seconds_sum[24h]) / (rate(http_request_duration_seconds_count[24h])) > 3 + for: 1m + labels: + severity: critical + deployment: customer-orders + namespace: default + annotations: + summary: "HTTP Requests to the '/checkout' endpoint in customer-orders-app are taking longer than 3 seconds" + description: "HTTP Requests to the '/checkout' endpoint in customer-orders-app are taking longer than 3 seconds" diff --git a/tests/llm/fixtures/test_ask_holmes/34_http_latency_graph/helm/requirements.txt b/tests/llm/fixtures/test_ask_holmes/34_http_latency_graph/helm/requirements.txt new file mode 100644 index 00000000..d44592fb --- /dev/null +++ b/tests/llm/fixtures/test_ask_holmes/34_http_latency_graph/helm/requirements.txt @@ -0,0 +1,5 @@ +fastapi +uvicorn[standard] +sqlalchemy +pymysql +prometheus-fastapi-instrumentator diff --git a/tests/llm/fixtures/test_ask_holmes/34_http_latency_graph/list_available_metrics.txt b/tests/llm/fixtures/test_ask_holmes/34_http_latency_graph/list_available_metrics.txt new file mode 100644 index 00000000..76714d56 --- /dev/null +++ b/tests/llm/fixtures/test_ask_holmes/34_http_latency_graph/list_available_metrics.txt @@ -0,0 +1,36 @@ +{"toolset_name":"prometheus/metrics","tool_name":"list_available_metrics","match_params":{"name_filter":"*"}} +Metric | Description | Type | Labels +---------------------------------------------------------------------------------------------------- +alertmanager_http_concurrency_limit_exceeded_total | Total number of times an HTTP request failed because the concurrency limit was reached. | counter | container, endpoint, instance, job, method, namespace, pod, service +alertmanager_http_request_duration_seconds | Histogram of latencies for HTTP requests. | histogram | none +alertmanager_http_requests_in_flight | Current number of HTTP requests being processed. | gauge | container, endpoint, instance, job, method, namespace, pod, service +alertmanager_http_response_size_bytes | Histogram of response size for HTTP requests. | histogram | none +go_godebug_non_default_behavior_http2client_events_total | The number of non-default behaviors executed by the net/http package due to a non-default GODEBUG=http2client=... setting. | counter | endpoint, instance, job, metrics_path, namespace, node, service +go_godebug_non_default_behavior_http2server_events_total | The number of non-default behaviors executed by the net/http package due to a non-default GODEBUG=http2server=... setting. | counter | endpoint, instance, job, metrics_path, namespace, node, service +go_godebug_non_default_behavior_httplaxcontentlength_events_total | The number of non-default behaviors executed by the net/http package due to a non-default GODEBUG=httplaxcontentlength=... setting. | counter | endpoint, instance, job, metrics_path, namespace, node, service +go_godebug_non_default_behavior_httpmuxgo121_events_total | The number of non-default behaviors executed by the net/http package due to a non-default GODEBUG=httpmuxgo121=... setting. | counter | endpoint, instance, job, metrics_path, namespace, node, service +http_request_duration_highr_seconds | Latency with many buckets but no API specific labels. Made for more accurate percentile calculations. | histogram | none +http_request_duration_highr_seconds_created | Latency with many buckets but no API specific labels. Made for more accurate percentile calculations. | gauge | container, endpoint, instance, job, namespace, pod, service +http_request_duration_seconds | Latency with only few buckets by handler. Made to be only used if aggregation by handler is important. | histogram | none +http_request_duration_seconds_created | Latency with only few buckets by handler. Made to be only used if aggregation by handler is important. | gauge | container, endpoint, handler, instance, job, method, namespace, pod, service +http_request_size_bytes | Content length of incoming requests by handler. Only value of header is respected. Otherwise ignored. No percentile calculated. | summary | none +http_request_size_bytes_created | Content length of incoming requests by handler. Only value of header is respected. Otherwise ignored. No percentile calculated. | gauge | container, endpoint, handler, instance, job, namespace, pod, service +http_requests_created | Total number of requests by method, status and handler. | gauge | container, endpoint, handler, instance, job, method, namespace, pod, service, status +http_requests_total | Total number of requests by method, status and handler. | counter | container, endpoint, handler, instance, job, method, namespace, pod, service, status +http_response_size_bytes | Content length of outgoing responses by handler. Only value of header is respected. Otherwise ignored. No percentile calculated. | summary | none +http_response_size_bytes_created | Content length of outgoing responses by handler. Only value of header is respected. Otherwise ignored. No percentile calculated. | gauge | container, endpoint, handler, instance, job, namespace, pod, service +kubelet_http_inflight_requests | [ALPHA] Number of the inflight http requests | gauge | endpoint, instance, job, long_running, method, metrics_path, namespace, node, path, server_type, service +kubelet_http_requests_duration_seconds | [ALPHA] Duration in seconds to serve http requests | histogram | none +kubelet_http_requests_total | [ALPHA] Number of the http requests received since the server started | counter | endpoint, instance, job, long_running, method, metrics_path, namespace, node, path, server_type, service +kubelet_lifecycle_handler_http_fallbacks_total | [ALPHA] The number of times lifecycle handlers successfully fell back to http from https. | counter | endpoint, instance, job, metrics_path, namespace, node, service +prometheus_http_request_duration_seconds | Histogram of latencies for HTTP requests. | histogram | none +prometheus_http_requests_total | Counter of HTTP requests. | counter | code, container, endpoint, handler, instance, job, namespace, pod, service +prometheus_http_response_size_bytes | Histogram of response size for HTTP requests. | histogram | none +prometheus_operator_kubernetes_client_http_request_duration_seconds | Summary of latencies for the Kubernetes client's requests by endpoint. | summary | none +prometheus_operator_kubernetes_client_http_requests_total | Total number of Kubernetes's client requests by status code. | counter | container, endpoint, instance, job, namespace, pod, service, status_code +prometheus_sd_http_failures_total | Number of HTTP service discovery refresh failures. | counter | container, endpoint, instance, job, namespace, pod, service +prometheus_sd_kubernetes_http_request_duration_seconds | Summary of latencies for HTTP requests to the Kubernetes API by endpoint. | summary | none +prometheus_sd_kubernetes_http_request_total | Total number of HTTP requests to the Kubernetes API by status code. | counter | container, endpoint, instance, job, namespace, pod, service, status_code +promhttp_metric_handler_errors_total | Total number of internal errors encountered by the promhttp metric handler. | counter | cause, container, endpoint, instance, job, namespace, pod, service +promhttp_metric_handler_requests_in_flight | Current number of scrapes being served. | gauge | container, endpoint, instance, job, namespace, pod, service +promhttp_metric_handler_requests_total | Total number of scrapes by HTTP status code. | counter | code, container, endpoint, instance, job, namespace, pod, service diff --git a/tests/llm/fixtures/test_ask_holmes/34_http_latency_graph/test_case.yaml b/tests/llm/fixtures/test_ask_holmes/34_http_latency_graph/test_case.yaml new file mode 100644 index 00000000..a3739334 --- /dev/null +++ b/tests/llm/fixtures/test_ask_holmes/34_http_latency_graph/test_case.yaml @@ -0,0 +1,8 @@ +user_prompt: "How is the http request latency for the service customer-orders-service?" +expected_output: + - 'The answer should include something similar to << { tool_name: "execute_prometheus_range_query", random_key: "" } >>.' + - "The root endpoint `/` latency ranges (very roughly) from 5+ to 8+ seconds." +before_test: kubectl apply -f ./helm/manifest.yaml +after_test: kubectl delete -f ./helm/manifest.yaml +evaluation: + correctness: 1 diff --git a/tests/llm/test_ask_holmes.py b/tests/llm/test_ask_holmes.py index 6acdaf14..2d12400e 100644 --- a/tests/llm/test_ask_holmes.py +++ b/tests/llm/test_ask_holmes.py @@ -58,11 +58,14 @@ def idfn(val): ) @pytest.mark.parametrize("experiment_name, test_case", get_test_cases(), ids=idfn) def test_ask_holmes(experiment_name, test_case): - bt_helper = braintrust_util.BraintrustEvalHelper( - project_name=PROJECT, dataset_name=DATASET_NAME - ) + bt_helper = None + eval = None + if braintrust_util.PUSH_EVALS_TO_BRAINTRUST: + bt_helper = braintrust_util.BraintrustEvalHelper( + project_name=PROJECT, dataset_name=DATASET_NAME + ) - eval = bt_helper.start_evaluation(experiment_name, name=test_case.id) + eval = bt_helper.start_evaluation(experiment_name, name=test_case.id) try: before_test(test_case) @@ -97,14 +100,15 @@ def test_ask_holmes(experiment_name, test_case): output=output, context_items=test_case.retrieval_context, input=input ).score - bt_helper.end_evaluation( - eval=eval, - input=input, - output=output or "", - expected=str(expected), - id=test_case.id, - scores=scores, - ) + if bt_helper and eval: + bt_helper.end_evaluation( + eval=eval, + input=input, + output=output or "", + expected=str(expected), + id=test_case.id, + scores=scores, + ) print(f"\n** OUTPUT **\n{output}") print(f"\n** SCORES **\n{scores}") @@ -124,6 +128,7 @@ def ask_holmes(test_case: AskHolmesTestCase) -> LLMResult: expected_tools.append(tool_mock.tool_name) tool_executor = ToolExecutor(mock.mocked_toolsets) + ai = ToolCallingLLM( tool_executor=tool_executor, max_steps=10, diff --git a/tests/llm/test_investigate.py b/tests/llm/test_investigate.py index 8aa51add..95228a29 100644 --- a/tests/llm/test_investigate.py +++ b/tests/llm/test_investigate.py @@ -97,9 +97,13 @@ def test_investigate(experiment_name, test_case): metadata = get_machine_state_tags() metadata["model"] = config.model or "Unknown" - # bt_helper = braintrust_util.BraintrustEvalHelper(project_name=PROJECT, dataset_name=DATASET_NAME) - - # eval = bt_helper.start_evaluation(experiment_name, name=test_case.id) + bt_helper = None + eval = None + if braintrust_util.PUSH_EVALS_TO_BRAINTRUST: + bt_helper = braintrust_util.BraintrustEvalHelper( + project_name=PROJECT, dataset_name=DATASET_NAME + ) + eval = bt_helper.start_evaluation(experiment_name, name=test_case.id) investigate_request = test_case.investigate_request investigate_request.sections = DEFAULT_SECTIONS @@ -128,14 +132,15 @@ def test_investigate(experiment_name, test_case): input=input, output=output, context_items=test_case.retrieval_context ).score - # bt_helper.end_evaluation( - # eval=eval, - # input=input, - # output=output or "", - # expected=str(expected), - # id=test_case.id, - # scores=scores - # ) + if bt_helper and eval: + bt_helper.end_evaluation( + eval=eval, + input=input, + output=output or "", + expected=str(expected), + id=test_case.id, + scores=scores, + ) print(f"\n** OUTPUT **\n{output}") print(f"\n** SCORES **\n{scores}") diff --git a/tests/llm/utils/braintrust.py b/tests/llm/utils/braintrust.py index ddca2702..cae8530b 100644 --- a/tests/llm/utils/braintrust.py +++ b/tests/llm/utils/braintrust.py @@ -3,6 +3,7 @@ import logging from typing import Any, List, Optional +from holmes.common.env_vars import load_bool from tests.llm.utils.mock_utils import HolmesTestCase from tests.llm.utils.system import get_machine_state_tags @@ -35,6 +36,9 @@ def pop_matching_test_case_if_exists( return pop_test_case(test_cases, test_case_id) +PUSH_EVALS_TO_BRAINTRUST = load_bool("PUSH_EVALS_TO_BRAINTRUST", False) + + class BraintrustEvalHelper: def __init__(self, project_name: str, dataset_name: str) -> None: self.project_name = project_name diff --git a/tests/llm/utils/mock_toolset.py b/tests/llm/utils/mock_toolset.py index 62f60839..217a082e 100644 --- a/tests/llm/utils/mock_toolset.py +++ b/tests/llm/utils/mock_toolset.py @@ -94,8 +94,8 @@ def find_matching_mock(self, params: Dict) -> Optional[ToolMock]: return mock match = all( - key in params and params[key] == val - for key, val in mock.match_params.items() + key in params and params[key] == mock_val or mock_val == "*" + for key, mock_val in mock.match_params.items() ) if match: return mock @@ -120,6 +120,10 @@ class MockToolsets: def __init__(self, test_case_folder: str, generate_mocks: bool = True) -> None: self.unmocked_toolsets = load_builtin_toolsets() + + for toolset in self.unmocked_toolsets: + toolset.check_prerequisites() + self.generate_mocks = generate_mocks self.test_case_folder = test_case_folder self._mocks = [] diff --git a/tests/test_prometheus.py b/tests/test_prometheus.py new file mode 100644 index 00000000..195e45de --- /dev/null +++ b/tests/test_prometheus.py @@ -0,0 +1,136 @@ +import json +import os + +import pytest +from holmes.core.tools import ToolExecutor +from holmes.plugins.toolsets.prometheus import ( + PrometheusConfig, + PrometheusToolset, + filter_metrics_by_name, + filter_metrics_by_type, +) + +pytestmark = pytest.mark.skipif( + os.environ.get("PROMETHEUS_URL", None) is None, reason="PROMETHEUS_URL must be set" +) + +PROMETHEUS_URL = os.environ.get("PROMETHEUS_URL", None) + +toolset = PrometheusToolset() +toolset.config = PrometheusConfig(prometheus_url=PROMETHEUS_URL) +tool_executor = ToolExecutor(toolsets=[toolset]) + + +def test_list_available_metrics(): + tool = tool_executor.get_tool_by_name("list_available_metrics") + assert tool + actual_output = tool.invoke({}) + print(actual_output) + assert "kubelet_running_pods" in actual_output + assert False + + +def test_execute_prometheus_query(): + tool = tool_executor.get_tool_by_name("execute_prometheus_query") + assert tool + actual_output = tool.invoke({"query": "up", "type": "query"}) + print(actual_output) + assert actual_output + parsed_output = json.loads(actual_output) + assert parsed_output.get("status") == "success" + + +@pytest.mark.parametrize( + "metrics, expected_type, expected_result", + [ + ( + { + "metric1": {"type": "counter"}, + "metric2": {"type": "gauge"}, + "metric3": {"type": "counter"}, + }, + "counter", + {"metric1": {"type": "counter"}, "metric3": {"type": "counter"}}, + ), + # Test case 2: Empty result when type doesn't exist + ( + {"metric1": {"type": "counter"}, "metric2": {"type": "gauge"}}, + "histogram", + {}, + ), + # Test case 3: Empty input dictionary + ({}, "counter", {}), + # Test case 4: Metrics with missing type field + ( + { + "metric1": {"type": "counter"}, + "metric2": {}, + "metric3": {"type": "counter"}, + }, + "counter", + {"metric1": {"type": "counter"}, "metric3": {"type": "counter"}}, + ), + ], +) +def test_filter_metrics_by_type(metrics, expected_type, expected_result): + result = filter_metrics_by_type(metrics, expected_type) + assert result == expected_result + + +@pytest.mark.parametrize( + "metrics, pattern, expected", + [ + ( + { + "node_memory_Active_bytes": {"type": "gauge"}, + "node_cpu_seconds_total": {"type": "counter"}, + "process_start_time": {"type": "gauge"}, + }, + "node_.*", # Pattern to match metrics starting with "node_" + { + "node_memory_Active_bytes": {"type": "gauge"}, + "node_cpu_seconds_total": {"type": "counter"}, + }, + ), + ( + { + "node_memory_Active_bytes": {"type": "gauge"}, + "node_memory_Cached_bytes": {"type": "gauge"}, + "process_cpu_seconds": {"type": "counter"}, + }, + "memory", + { + "node_memory_Active_bytes": {"type": "gauge"}, + "node_memory_Cached_bytes": {"type": "gauge"}, + }, + ), + ( + { + "node_memory_Active_bytes": {"type": "gauge"}, + "node_memory_Cached_bytes": {"type": "gauge"}, + "process_cpu_seconds": {"type": "counter"}, + }, + ".*memory.*", # Pattern to match metrics containing "memory" + { + "node_memory_Active_bytes": {"type": "gauge"}, + "node_memory_Cached_bytes": {"type": "gauge"}, + }, + ), + ( + { + "node_memory_Active_bytes": {"type": "gauge"}, + "process_cpu_seconds": {"type": "counter"}, + }, + "nonexistent.*", # Pattern that matches nothing + {}, + ), + ( + {}, + ".*", # Pattern that matches everything, but empty input + {}, + ), + ], +) +def test_filter_metrics_by_name(metrics, pattern, expected): + result = filter_metrics_by_name(metrics, pattern) + assert result == expected From 6b42ea534c8851f32a4992fe86cf1e08c0b75e62 Mon Sep 17 00:00:00 2001 From: Nicolas Herment Date: Thu, 20 Feb 2025 07:39:58 +0100 Subject: [PATCH 02/14] feat: fix prometheus tests --- holmes/plugins/toolsets/datetime.py | 1 + tests/test_prometheus.py | 23 ++++++++++++++--------- 2 files changed, 15 insertions(+), 9 deletions(-) diff --git a/holmes/plugins/toolsets/datetime.py b/holmes/plugins/toolsets/datetime.py index 6f37623c..ee12d0ee 100644 --- a/holmes/plugins/toolsets/datetime.py +++ b/holmes/plugins/toolsets/datetime.py @@ -30,4 +30,5 @@ def __init__(self): prerequisites=[], tools=[CurrentTime()], tags=[ToolsetTag.CORE], + is_default=True ) diff --git a/tests/test_prometheus.py b/tests/test_prometheus.py index 195e45de..88fdeed2 100644 --- a/tests/test_prometheus.py +++ b/tests/test_prometheus.py @@ -2,7 +2,7 @@ import os import pytest -from holmes.core.tools import ToolExecutor +from holmes.core.tools import CallablePrerequisite, ToolExecutor, ToolsetStatusEnum from holmes.plugins.toolsets.prometheus import ( PrometheusConfig, PrometheusToolset, @@ -16,22 +16,27 @@ PROMETHEUS_URL = os.environ.get("PROMETHEUS_URL", None) -toolset = PrometheusToolset() -toolset.config = PrometheusConfig(prometheus_url=PROMETHEUS_URL) -tool_executor = ToolExecutor(toolsets=[toolset]) +@pytest.fixture +def tool_executor(): + toolset = PrometheusToolset() + toolset.enabled = True + toolset.config = {"prometheus_url": PROMETHEUS_URL} + toolset.check_prerequisites() + assert toolset.get_status() == ToolsetStatusEnum.ENABLED + tool_executor = ToolExecutor(toolsets=[toolset]) + return tool_executor -def test_list_available_metrics(): +def test_list_available_metrics(tool_executor:ToolExecutor): tool = tool_executor.get_tool_by_name("list_available_metrics") assert tool - actual_output = tool.invoke({}) + actual_output = tool.invoke({"name_filter": "kubelet_running_pods"}) print(actual_output) assert "kubelet_running_pods" in actual_output - assert False -def test_execute_prometheus_query(): - tool = tool_executor.get_tool_by_name("execute_prometheus_query") +def test_execute_prometheus_query(tool_executor:ToolExecutor): + tool = tool_executor.get_tool_by_name("execute_prometheus_instant_query") assert tool actual_output = tool.invoke({"query": "up", "type": "query"}) print(actual_output) From 3a6aa3b4e2c504236e21242d2cca1eebdcfcc5b1 Mon Sep 17 00:00:00 2001 From: Nicolas Herment Date: Thu, 20 Feb 2025 07:42:48 +0100 Subject: [PATCH 03/14] test: split prometheus unit and integration tests --- holmes/plugins/toolsets/datetime.py | 2 +- tests/test_prometheus_integration.py | 45 +++++++++++++++++++ ..._prometheus.py => test_prometheus_unit.py} | 40 ----------------- 3 files changed, 46 insertions(+), 41 deletions(-) create mode 100644 tests/test_prometheus_integration.py rename tests/{test_prometheus.py => test_prometheus_unit.py} (70%) diff --git a/holmes/plugins/toolsets/datetime.py b/holmes/plugins/toolsets/datetime.py index ee12d0ee..48c4a39c 100644 --- a/holmes/plugins/toolsets/datetime.py +++ b/holmes/plugins/toolsets/datetime.py @@ -30,5 +30,5 @@ def __init__(self): prerequisites=[], tools=[CurrentTime()], tags=[ToolsetTag.CORE], - is_default=True + is_default=True, ) diff --git a/tests/test_prometheus_integration.py b/tests/test_prometheus_integration.py new file mode 100644 index 00000000..ff3150a0 --- /dev/null +++ b/tests/test_prometheus_integration.py @@ -0,0 +1,45 @@ +import json +import os + +import pytest +from holmes.core.tools import CallablePrerequisite, ToolExecutor, ToolsetStatusEnum +from holmes.plugins.toolsets.prometheus import ( + PrometheusConfig, + PrometheusToolset, + filter_metrics_by_name, + filter_metrics_by_type, +) + +pytestmark = pytest.mark.skipif( + os.environ.get("PROMETHEUS_URL", None) is None, reason="PROMETHEUS_URL must be set" +) + +PROMETHEUS_URL = os.environ.get("PROMETHEUS_URL", None) + +@pytest.fixture +def tool_executor(): + toolset = PrometheusToolset() + toolset.enabled = True + toolset.config = {"prometheus_url": PROMETHEUS_URL} + toolset.check_prerequisites() + assert toolset.get_status() == ToolsetStatusEnum.ENABLED + tool_executor = ToolExecutor(toolsets=[toolset]) + return tool_executor + + +def test_list_available_metrics(tool_executor:ToolExecutor): + tool = tool_executor.get_tool_by_name("list_available_metrics") + assert tool + actual_output = tool.invoke({"name_filter": "kubelet_running_pods"}) + print(actual_output) + assert "kubelet_running_pods" in actual_output + + +def test_execute_prometheus_query(tool_executor:ToolExecutor): + tool = tool_executor.get_tool_by_name("execute_prometheus_instant_query") + assert tool + actual_output = tool.invoke({"query": "up", "type": "query"}) + print(actual_output) + assert actual_output + parsed_output = json.loads(actual_output) + assert parsed_output.get("status") == "success" diff --git a/tests/test_prometheus.py b/tests/test_prometheus_unit.py similarity index 70% rename from tests/test_prometheus.py rename to tests/test_prometheus_unit.py index 88fdeed2..466249ce 100644 --- a/tests/test_prometheus.py +++ b/tests/test_prometheus_unit.py @@ -1,49 +1,9 @@ -import json -import os - import pytest -from holmes.core.tools import CallablePrerequisite, ToolExecutor, ToolsetStatusEnum from holmes.plugins.toolsets.prometheus import ( - PrometheusConfig, - PrometheusToolset, filter_metrics_by_name, filter_metrics_by_type, ) -pytestmark = pytest.mark.skipif( - os.environ.get("PROMETHEUS_URL", None) is None, reason="PROMETHEUS_URL must be set" -) - -PROMETHEUS_URL = os.environ.get("PROMETHEUS_URL", None) - -@pytest.fixture -def tool_executor(): - toolset = PrometheusToolset() - toolset.enabled = True - toolset.config = {"prometheus_url": PROMETHEUS_URL} - toolset.check_prerequisites() - assert toolset.get_status() == ToolsetStatusEnum.ENABLED - tool_executor = ToolExecutor(toolsets=[toolset]) - return tool_executor - - -def test_list_available_metrics(tool_executor:ToolExecutor): - tool = tool_executor.get_tool_by_name("list_available_metrics") - assert tool - actual_output = tool.invoke({"name_filter": "kubelet_running_pods"}) - print(actual_output) - assert "kubelet_running_pods" in actual_output - - -def test_execute_prometheus_query(tool_executor:ToolExecutor): - tool = tool_executor.get_tool_by_name("execute_prometheus_instant_query") - assert tool - actual_output = tool.invoke({"query": "up", "type": "query"}) - print(actual_output) - assert actual_output - parsed_output = json.loads(actual_output) - assert parsed_output.get("status") == "success" - @pytest.mark.parametrize( "metrics, expected_type, expected_result", From b11e3e1f4b1d96eded9196af7b0f3577a076bb01 Mon Sep 17 00:00:00 2001 From: Nicolas Herment Date: Thu, 20 Feb 2025 07:52:38 +0100 Subject: [PATCH 04/14] chore: ruff --- tests/test_prometheus_integration.py | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/tests/test_prometheus_integration.py b/tests/test_prometheus_integration.py index ff3150a0..c52f671f 100644 --- a/tests/test_prometheus_integration.py +++ b/tests/test_prometheus_integration.py @@ -2,12 +2,9 @@ import os import pytest -from holmes.core.tools import CallablePrerequisite, ToolExecutor, ToolsetStatusEnum +from holmes.core.tools import ToolExecutor, ToolsetStatusEnum from holmes.plugins.toolsets.prometheus import ( - PrometheusConfig, PrometheusToolset, - filter_metrics_by_name, - filter_metrics_by_type, ) pytestmark = pytest.mark.skipif( @@ -16,6 +13,7 @@ PROMETHEUS_URL = os.environ.get("PROMETHEUS_URL", None) + @pytest.fixture def tool_executor(): toolset = PrometheusToolset() @@ -27,7 +25,7 @@ def tool_executor(): return tool_executor -def test_list_available_metrics(tool_executor:ToolExecutor): +def test_list_available_metrics(tool_executor: ToolExecutor): tool = tool_executor.get_tool_by_name("list_available_metrics") assert tool actual_output = tool.invoke({"name_filter": "kubelet_running_pods"}) @@ -35,7 +33,7 @@ def test_list_available_metrics(tool_executor:ToolExecutor): assert "kubelet_running_pods" in actual_output -def test_execute_prometheus_query(tool_executor:ToolExecutor): +def test_execute_prometheus_query(tool_executor: ToolExecutor): tool = tool_executor.get_tool_by_name("execute_prometheus_instant_query") assert tool actual_output = tool.invoke({"query": "up", "type": "query"}) From 260238815398a1f42a103a8b8286e411276458ed Mon Sep 17 00:00:00 2001 From: Nicolas Herment Date: Fri, 21 Feb 2025 09:38:04 +0100 Subject: [PATCH 05/14] feat: prometheus toolset no longer returns query result butv alidate results are present --- .../prompts/generic_ask_conversation.jinja2 | 12 +- holmes/plugins/toolsets/prometheus.py | 82 +- holmes/utils/cache.py | 2 +- .../execute_prometheus_range_query.txt | 1260 ----------------- .../execute_prometheus_range_query.txt | 790 ----------- .../test_case.yaml | 1 + .../execute_prometheus_range_query.txt | 256 ---- ..._prometheus_range_query_with_namespace.txt | 256 ---- .../execute_prometheus_range_query.txt | 125 -- .../execute_prometheus_range_query_by_pod.txt | 127 -- ..._prometheus_range_query_with_namespace.txt | 125 -- ...heus_range_query_with_namespace_by_pod.txt | 267 ---- .../get_current_time.txt | 2 - .../kubectl_find_resource.txt | 5 - .../kubectl_get_by_name.txt | 6 - .../kubectl_lineage_children.txt | 23 - .../kubectl_top_pods.txt | 6 - .../list_available_metrics.txt | 4 - .../slow_oom_deployment.yaml | 32 - .../test_case.yaml | 13 - .../execute_prometheus_range_query.txt | 231 --- .../get_current_time.txt | 2 - .../33_cpu_throttling_graph/helm/Dockerfile | 19 - .../33_cpu_throttling_graph/helm/app.py | 54 - .../33_cpu_throttling_graph/helm/build.sh | 1 - .../helm/manifest.yaml | 100 -- .../helm/requirements.txt | 5 - .../kubectl_find_resource.txt | 5 - .../kubectl_top_pods.txt | 6 - .../list_available_metrics.txt | 4 - .../33_cpu_throttling_graph/test_case.yaml | 7 - tests/test_prometheus_integration.py | 89 +- 32 files changed, 156 insertions(+), 3761 deletions(-) delete mode 100644 tests/llm/fixtures/test_ask_holmes/32_oom_kill_graph_memory_consumption/execute_prometheus_range_query.txt delete mode 100644 tests/llm/fixtures/test_ask_holmes/32_oom_kill_graph_memory_consumption/execute_prometheus_range_query_by_pod.txt delete mode 100644 tests/llm/fixtures/test_ask_holmes/32_oom_kill_graph_memory_consumption/execute_prometheus_range_query_with_namespace.txt delete mode 100644 tests/llm/fixtures/test_ask_holmes/32_oom_kill_graph_memory_consumption/execute_prometheus_range_query_with_namespace_by_pod.txt delete mode 100644 tests/llm/fixtures/test_ask_holmes/32_oom_kill_graph_memory_consumption/get_current_time.txt delete mode 100644 tests/llm/fixtures/test_ask_holmes/32_oom_kill_graph_memory_consumption/kubectl_find_resource.txt delete mode 100644 tests/llm/fixtures/test_ask_holmes/32_oom_kill_graph_memory_consumption/kubectl_get_by_name.txt delete mode 100644 tests/llm/fixtures/test_ask_holmes/32_oom_kill_graph_memory_consumption/kubectl_lineage_children.txt delete mode 100644 tests/llm/fixtures/test_ask_holmes/32_oom_kill_graph_memory_consumption/kubectl_top_pods.txt delete mode 100644 tests/llm/fixtures/test_ask_holmes/32_oom_kill_graph_memory_consumption/list_available_metrics.txt delete mode 100644 tests/llm/fixtures/test_ask_holmes/32_oom_kill_graph_memory_consumption/slow_oom_deployment.yaml delete mode 100644 tests/llm/fixtures/test_ask_holmes/32_oom_kill_graph_memory_consumption/test_case.yaml delete mode 100644 tests/llm/fixtures/test_ask_holmes/33_cpu_throttling_graph/execute_prometheus_range_query.txt delete mode 100644 tests/llm/fixtures/test_ask_holmes/33_cpu_throttling_graph/get_current_time.txt delete mode 100644 tests/llm/fixtures/test_ask_holmes/33_cpu_throttling_graph/helm/Dockerfile delete mode 100644 tests/llm/fixtures/test_ask_holmes/33_cpu_throttling_graph/helm/app.py delete mode 100755 tests/llm/fixtures/test_ask_holmes/33_cpu_throttling_graph/helm/build.sh delete mode 100644 tests/llm/fixtures/test_ask_holmes/33_cpu_throttling_graph/helm/manifest.yaml delete mode 100644 tests/llm/fixtures/test_ask_holmes/33_cpu_throttling_graph/helm/requirements.txt delete mode 100644 tests/llm/fixtures/test_ask_holmes/33_cpu_throttling_graph/kubectl_find_resource.txt delete mode 100644 tests/llm/fixtures/test_ask_holmes/33_cpu_throttling_graph/kubectl_top_pods.txt delete mode 100644 tests/llm/fixtures/test_ask_holmes/33_cpu_throttling_graph/list_available_metrics.txt delete mode 100644 tests/llm/fixtures/test_ask_holmes/33_cpu_throttling_graph/test_case.yaml diff --git a/holmes/plugins/prompts/generic_ask_conversation.jinja2 b/holmes/plugins/prompts/generic_ask_conversation.jinja2 index bc5c6425..d404ff23 100644 --- a/holmes/plugins/prompts/generic_ask_conversation.jinja2 +++ b/holmes/plugins/prompts/generic_ask_conversation.jinja2 @@ -10,7 +10,9 @@ Use conversation history to maintain continuity when appropriate, ensuring effic Prometheus/PromQL queries * Use prometheus to execute promql queries with the tools `execute_prometheus_instant_query` and `execute_prometheus_range_query` -* Use these tools to retroactively check metrics. Here are standard metrics but you can use different ones: +* ALWAYS embed the execution results into your answer +* You only need to embed the partial result in your response. Include the `tool_name` and `random_key`. For example: << {tool_name: "execute_prometheus_query", random_key: "92jf2hf"} >> +* Use these tools to generate charts that users can see. Here are standard metrics but you can use different ones: ** For memory consumption: `container_memory_working_set_bytes` ** For CPU usage: `container_cpu_usage_seconds_total` ** For CPU throttling: `container_cpu_cfs_throttled_periods_total` @@ -18,10 +20,10 @@ Prometheus/PromQL queries ** Avoid using `_bucket` unless you know the bucket's boundaries are configured correctly ** Prefer individual averages like `rate(_sum) / rate(_count)` ** Avoid global averages like `sum(rate(_sum)) / sum(rate(_count))` because it hides data and is not generally informative -* ALWAYS embed the execution results into your answer if you use the data for your answer -* You only need to embed the partial result in your response. Include the `tool_name` and `random_key`. For example: << {tool_name: "execute_prometheus_query", random_key: "92jf2hf"} >> -* Post processing will parse your response, retrieve the tool output and create a chart -* Only generate and execute a prometheus query after checking what metrics are available with the `list_available_metrics` tool. Filter as needed +* Post processing will parse your response, re-run the query from the tool output and create a chart +* Only generate and execute a prometheus query after checking what metrics are available with the `list_available_metrics` tool. Filter as needed. +* Check that any node, service, pod, container, app, namespace, etc. mentioned in the query exist in the kubernetes cluster before making a query. Use any appropriate kubectl tool(s) for this. +* The toolcall will return no data to you. That is expected. Only the user will see the charts * You MUST get the current time before executing a prometheus range query Style guide: diff --git a/holmes/plugins/toolsets/prometheus.py b/holmes/plugins/toolsets/prometheus.py index d0ef549b..e8a3d6c7 100644 --- a/holmes/plugins/toolsets/prometheus.py +++ b/holmes/plugins/toolsets/prometheus.py @@ -69,20 +69,31 @@ def fetch_metadata(url: str) -> dict: return metadata +def result_has_data(result: dict) -> bool: + data = result.get("data", {}) + if data.get("resultType", None) == "vector" and len(data.get("result", [])) > 0: + return True + return False + + def fetch_metrics_labels( prometheus_url: str, cache: Optional[TTLCache], metrics_labels_time_window_hrs: Union[int, None], + metric_name: str, ) -> dict: """This is a slow query. Takes 5+ seconds to run""" - + cache_key = f"metrics_labels:{metric_name}" if cache: - cached_result = cache.get("metrics_labels") + cached_result = cache.get(cache_key) if cached_result: logging.info("fetch_metrics_labels() result retrieved from cache") return cached_result series_url = urljoin(prometheus_url, "/api/v1/series") + # params: dict = { + # "match[]": f'{{__name__=~".*{metric_name}.*"}}', + # } params: dict = { "match[]": '{__name__!=""}', } @@ -108,7 +119,8 @@ def fetch_metrics_labels( else: metrics_labels[metric_name] = labels if cache: - cache.set("metrics_labels", metrics_labels) + cache.set(cache_key, metrics_labels) + return metrics_labels @@ -116,9 +128,15 @@ def fetch_metrics( url: str, cache: Optional[TTLCache], metrics_labels_time_window_hrs: Union[int, None], + metric_name: str, ) -> dict: metadata = fetch_metadata(url) - metrics_labels = fetch_metrics_labels(url, cache, metrics_labels_time_window_hrs) + metrics_labels = fetch_metrics_labels( + prometheus_url=url, + cache=cache, + metrics_labels_time_window_hrs=metrics_labels_time_window_hrs, + metric_name=metric_name, + ) metrics = {} for metric_name, meta_list in metadata.items(): @@ -177,8 +195,9 @@ def invoke(self, params: Any) -> str: name_filter = params.get("name_filter") if not name_filter: return "Error: cannot run tool 'list_available_metrics'. The param 'name_filter' is required but is missing." + metrics = fetch_metrics( - prometheus_url, self._cache, metrics_labels_time_window_hrs + prometheus_url, self._cache, metrics_labels_time_window_hrs, name_filter ) metrics = filter_metrics_by_name(metrics, name_filter) @@ -214,7 +233,7 @@ def get_parameterized_one_liner(self, params) -> str: return f'list available prometheus metrics: name_filter="{params.get("name_filter", "")}", type_filter="{params.get("type_filter", "")}"' -class ExecuteQuery(BasePrometheusTool): +class ExecuteInstantQuery(BasePrometheusTool): def __init__(self, toolset: "PrometheusToolset"): super().__init__( name="execute_prometheus_instant_query", @@ -249,11 +268,22 @@ def invoke(self, params: Any) -> str: if response.status_code == 200: data = response.json() - data["random_key"] = generate_random_key() - data["tool_name"] = self.name - data["description"] = description - data["query"] = query - data_str = json.dumps(data, indent=2) + status = data.get("status") + error_message = None + if status == "success" and not result_has_data(data): + status = "Failed" + error_message = ( + "The prometheus query returned no result. Is the query correct?" + ) + response_data = { + "status": status, + "error_message": error_message, + "random_key": generate_random_key(), + "tool_name": self.name, + "description": description, + "query": query, + } + data_str = json.dumps(response_data, indent=2) return data_str # Handle known Prometheus error status codes @@ -345,15 +375,25 @@ def invoke(self, params: Any) -> str: if response.status_code == 200: data = response.json() - - data["random_key"] = generate_random_key() - data["tool_name"] = self.name - data["description"] = description - data["query"] = query - data["start"] = start - data["end"] = end - data["step"] = step - data_str = json.dumps(data, indent=2) + status = data.get("status") + error_message = None + if status == "success" and not result_has_data(data): + status = "Failed" + error_message = ( + "The prometheus query returned no result. Is the query correct?" + ) + response_data = { + "status": status, + "error_message": error_message, + "random_key": generate_random_key(), + "tool_name": self.name, + "description": description, + "query": query, + "start": start, + "end": end, + "step": step, + } + data_str = json.dumps(response_data, indent=2) return data_str error_msg = "Unknown error occurred" @@ -397,7 +437,7 @@ def __init__(self): prerequisites=[CallablePrerequisite(callable=self.prerequisites_callable)], tools=[ ListAvailableMetrics(toolset=self), - ExecuteQuery(toolset=self), + ExecuteInstantQuery(toolset=self), ExecuteRangeQuery(toolset=self), ], tags=[ diff --git a/holmes/utils/cache.py b/holmes/utils/cache.py index 3554e13c..8d59d4e9 100644 --- a/holmes/utils/cache.py +++ b/holmes/utils/cache.py @@ -15,7 +15,7 @@ def default(self, o): def compress(data): json_str = json.dumps(data, cls=SetEncoder) json_bytes = json_str.encode("utf-8") - compressed = bz2.compress(json_bytes) + compressed = bz2.compress(json_bytes, compresslevel=1) return compressed diff --git a/tests/llm/fixtures/test_ask_holmes/29_basic_promql_graph_cluster_memory/execute_prometheus_range_query.txt b/tests/llm/fixtures/test_ask_holmes/29_basic_promql_graph_cluster_memory/execute_prometheus_range_query.txt index 7bab3f26..c06213ba 100644 --- a/tests/llm/fixtures/test_ask_holmes/29_basic_promql_graph_cluster_memory/execute_prometheus_range_query.txt +++ b/tests/llm/fixtures/test_ask_holmes/29_basic_promql_graph_cluster_memory/execute_prometheus_range_query.txt @@ -1,1266 +1,6 @@ {"toolset_name":"prometheus/metrics","tool_name":"execute_prometheus_range_query","match_params":{"query":"*","description":"*","start":"*","end":"*","step":"*"}} { "status": "success", - "data": { - "resultType": "matrix", - "result": [ - { - "metric": { - "namespace": "argocd" - }, - "values": [ - [ - 1738162001, - "782409728" - ], - [ - 1738162031, - "782692352" - ], - [ - 1738162061, - "782700544" - ], - [ - 1738162091, - "782426112" - ], - [ - 1738162121, - "782155776" - ], - [ - 1738162151, - "782401536" - ], - [ - 1738162181, - "782143488" - ], - [ - 1738162211, - "782139392" - ], - [ - 1738162241, - "782143488" - ], - [ - 1738162271, - "782168064" - ], - [ - 1738162301, - "782426112" - ], - [ - 1738162331, - "782430208" - ], - [ - 1738162361, - "782688256" - ], - [ - 1738162391, - "782712832" - ], - [ - 1738162421, - "782159872" - ], - [ - 1738162451, - "782434304" - ], - [ - 1738162481, - "782422016" - ], - [ - 1738162511, - "782172160" - ], - [ - 1738162541, - "782163968" - ], - [ - 1738162571, - "782430208" - ], - [ - 1738162601, - "782151680" - ], - [ - 1738162631, - "782422016" - ], - [ - 1738162661, - "782434304" - ], - [ - 1738162691, - "782409728" - ], - [ - 1738162721, - "782143488" - ], - [ - 1738162751, - "782442496" - ], - [ - 1738162781, - "782213120" - ], - [ - 1738162811, - "782434304" - ], - [ - 1738162841, - "782446592" - ], - [ - 1738162871, - "782172160" - ], - [ - 1738162901, - "782458880" - ], - [ - 1738162931, - "782692352" - ], - [ - 1738162961, - "782688256" - ], - [ - 1738162991, - "782184448" - ], - [ - 1738163021, - "782446592" - ], - [ - 1738163051, - "782696448" - ], - [ - 1738163081, - "782430208" - ], - [ - 1738163111, - "781275136" - ], - [ - 1738163141, - "780996608" - ], - [ - 1738163171, - "780992512" - ], - [ - 1738163201, - "780726272" - ], - [ - 1738163231, - "780750848" - ], - [ - 1738163261, - "780742656" - ], - [ - 1738163291, - "781262848" - ], - [ - 1738163321, - "780750848" - ], - [ - 1738163351, - "780750848" - ], - [ - 1738163381, - "781017088" - ], - [ - 1738163411, - "781004800" - ], - [ - 1738163441, - "781004800" - ], - [ - 1738163471, - "781037568" - ], - [ - 1738163501, - "781029376" - ], - [ - 1738163531, - "780767232" - ], - [ - 1738163561, - "780759040" - ], - [ - 1738163591, - "781037568" - ], - [ - 1738163621, - "781045760" - ], - [ - 1738163651, - "781570048" - ], - [ - 1738163681, - "781041664" - ], - [ - 1738163711, - "781058048" - ], - [ - 1738163741, - "781021184" - ], - [ - 1738163771, - "781033472" - ], - [ - 1738163801, - "780759040" - ] - ] - }, - { - "metric": { - "namespace": "default" - }, - "values": [ - [ - 1738162001, - "14914916352" - ], - [ - 1738162031, - "14920581120" - ], - [kind-grafana-cloud - 1738162061, - "14959681536" - ], - [ - 1738162091, - "14967799808" - ], - [ - 1738162121, - "14976086016" - ], - [ - 1738162151, - "14969462784" - ], - [ - 1738162181, - "14942916608" - ], - [ - 1738162211, - "14932123648" - ], - [ - 1738162241, - "14990041088" - ], - [ - 1738162271, - "15017783296" - ], - [ - 1738162301, - "14981828608" - ], - [ - 1738162331, - "15013658624" - ], - [ - 1738162361, - "15031767040" - ], - [ - 1738162391, - "15024631808" - ], - [ - 1738162421, - "15031906304" - ], - [ - 1738162451, - "15011442688" - ], - [ - 1738162481, - "15015374848" - ], - [ - 1738162511, - "15028559872" - ], - [ - 1738162541, - "15039492096" - ], - [ - 1738162571, - "14992990208" - ], - [ - 1738162601, - "14985097216" - ], - [ - 1738162631, - "14942883840" - ], - [ - 1738162661, - "15208288256" - ], - [ - 1738162691, - "15175557120" - ], - [ - 1738162721, - "15177445376" - ], - [ - 1738162751, - "15179718656" - ], - [ - 1738162781, - "16764833792" - ], - [ - 1738162811, - "16750112768" - ], - [ - 1738162841, - "16324071424" - ], - [ - 1738162871, - "17035313152" - ], - [ - 1738162901, - "16249901056" - ], - [ - 1738162931, - "15430139904" - ], - [ - 1738162961, - "15603331072" - ], - [ - 1738162991, - "15855284224" - ], - [ - 1738163021, - "15034163200" - ], - [ - 1738163051, - "14576488448" - ], - [ - 1738163081, - "14605860864" - ], - [ - 1738163111, - "14632443904" - ], - [ - 1738163141, - "14638026752" - ], - [ - 1738163171, - "15656050688" - ], - [ - 1738163201, - "15687675904" - ], - [ - 1738163231, - "15727095808" - ], - [ - 1738163261, - "15859519488" - ], - [ - 1738163291, - "15273623552" - ], - [ - 1738163321, - "15244197888" - ], - [ - 1738163351, - "14565888000" - ], - [ - 1738163381, - "15233122304" - ], - [ - 1738163411, - "15021203456" - ], - [ - 1738163441, - "14451933184" - ], - [ - 1738163471, - "14449156096" - ], - [ - 1738163501, - "14472785920" - ], - [ - 1738163531, - "14520791040" - ], - [ - 1738163561, - "14470537216" - ], - [ - 1738163591, - "14452244480" - ], - [ - 1738163621, - "13654368256" - ], - [ - 1738163651, - "13661114368" - ], - [ - 1738163681, - "13604032512" - ], - [ - 1738163711, - "13678948352" - ], - [ - 1738163741, - "13712158720" - ], - [ - 1738163771, - "13720158208" - ], - [ - 1738163801, - "13677768704" - ] - ] - }, - { - "metric": { - "namespace": "kube-system" - }, - "values": [ - [ - 1738162001, - "3344011264" - ], - [ - 1738162031, - "3336151040" - ], - [ - 1738162061, - "3334979584" - ], - [ - 1738162091, - "3334868992" - ], - [ - 1738162121, - "3349721088" - ], - [ - 1738162151, - "3338203136" - ], - [ - 1738162181, - "3339788288" - ], - [ - 1738162211, - "3353309184" - ], - [ - 1738162241, - "3352088576" - ], - [ - 1738162271, - "3349053440" - ], - [ - 1738162301, - "3362312192" - ], - [ - 1738162331, - "3372867584" - ], - [ - 1738162361, - "3374501888" - ], - [ - 1738162391, - "3381321728" - ], - [ - 1738162421, - "3297435648" - ], - [ - 1738162451, - "3297333248" - ], - [ - 1738162481, - "3319226368" - ], - [ - 1738162511, - "3325181952" - ], - [ - 1738162541, - "3301502976" - ], - [ - 1738162571, - "3415863296" - ], - [ - 1738162601, - "3416260608" - ], - [ - 1738162631, - "3415916544" - ], - [ - 1738162661, - "3383025664" - ], - [ - 1738162691, - "3383173120" - ], - [ - 1738162721, - "3384684544" - ], - [ - 1738162751, - "3391975424" - ], - [ - 1738162781, - "3342675968" - ], - [ - 1738162811, - "3345620992" - ], - [ - 1738162841, - "3366748160" - ], - [ - 1738162871, - "3367297024" - ], - [ - 1738162901, - "3363966976" - ], - [ - 1738162931, - "3370590208" - ], - [ - 1738162961, - "3373965312" - ], - [ - 1738162991, - "3374338048" - ], - [ - 1738163021, - "3376738304" - ], - [ - 1738163051, - "3317305344" - ], - [ - 1738163081, - "3317108736" - ], - [ - 1738163111, - "3332677632" - ], - [ - 1738163141, - "3334553600" - ], - [ - 1738163171, - "3367936000" - ], - [ - 1738163201, - "3381387264" - ], - [ - 1738163231, - "3380572160" - ], - [ - 1738163261, - "3376762880" - ], - [ - 1738163291, - "3379228672" - ], - [ - 1738163321, - "3302445056" - ], - [ - 1738163351, - "3302092800" - ], - [ - 1738163381, - "3320926208" - ], - [ - 1738163411, - "3321548800" - ], - [ - 1738163441, - "3311190016" - ], - [ - 1738163471, - "3440578560" - ], - [ - 1738163501, - "3441373184" - ], - [ - 1738163531, - "3442544640" - ], - [ - 1738163561, - "3447529472" - ], - [ - 1738163591, - "3392344064" - ], - [ - 1738163621, - "3393798144" - ], - [ - 1738163651, - "3399401472" - ], - [ - 1738163681, - "3401732096" - ], - [ - 1738163711, - "3298332672" - ], - [ - 1738163741, - "3311722496" - ], - [ - 1738163771, - "3358076928" - ], - [ - 1738163801, - "3360256000" - ] - ] - }, - { - "metric": { - "namespace": "local-path-storage" - }, - "values": [ - [ - 1738162001, - "81846272" - ], - [ - 1738162031, - "81846272" - ], - [ - 1738162061, - "81846272" - ], - [ - 1738162091, - "81846272" - ], - [ - 1738162121, - "81846272" - ], - [ - 1738162151, - "81846272" - ], - [ - 1738162181, - "81846272" - ], - [ - 1738162211, - "81846272" - ], - [ - 1738162241, - "81846272" - ], - [ - 1738162271, - "81846272" - ], - [ - 1738162301, - "81846272" - ], - [ - 1738162331, - "81846272" - ], - [ - 1738162361, - "81846272" - ], - [ - 1738162391, - "81846272" - ], - [ - 1738162421, - "81846272" - ], - [ - 1738162451, - "81846272" - ], - [ - 1738162481, - "81846272" - ], - [ - 1738162511, - "81846272" - ], - [ - 1738162541, - "81846272" - ], - [ - 1738162571, - "81846272" - ], - [ - 1738162601, - "81846272" - ], - [ - 1738162631, - "81846272" - ], - [ - 1738162661, - "81846272" - ], - [ - 1738162691, - "81846272" - ], - [ - 1738162721, - "81846272" - ], - [ - 1738162751, - "81846272" - ], - [ - 1738162781, - "81846272" - ], - [ - 1738162811, - "81846272" - ], - [ - 1738162841, - "81846272" - ], - [ - 1738162871, - "81846272" - ], - [ - 1738162901, - "81846272" - ], - [ - 1738162931, - "81846272" - ], - [ - 1738162961, - "81846272" - ], - [ - 1738162991, - "81846272" - ], - [ - 1738163021, - "81846272" - ], - [ - 1738163051, - "81846272" - ], - [ - 1738163081, - "81846272" - ], - [ - 1738163111, - "81846272" - ], - [ - 1738163141, - "81846272" - ], - [ - 1738163171, - "81850368" - ], - [ - 1738163201, - "81846272" - ], - [ - 1738163231, - "81846272" - ], - [ - 1738163261, - "81846272" - ], - [ - 1738163291, - "81846272" - ], - [ - 1738163321, - "82108416" - ], - [ - 1738163351, - "81846272" - ], - [ - 1738163381, - "81846272" - ], - [ - 1738163411, - "81846272" - ], - [ - 1738163441, - "81846272" - ], - [ - 1738163471, - "81846272" - ], - [ - 1738163501, - "81846272" - ], - [ - 1738163531, - "81846272" - ], - [ - 1738163561, - "81846272" - ], - [ - 1738163591, - "81846272" - ], - [ - 1738163621, - "81846272" - ], - [ - 1738163651, - "81846272" - ], - [ - 1738163681, - "81846272" - ], - [ - 1738163711, - "81846272" - ], - [ - 1738163741, - "81846272" - ], - [ - 1738163771, - "81846272" - ], - [ - 1738163801, - "81846272" - ] - ] - }, - { - "metric": { - "namespace": "sock-shop" - }, - "values": [ - [ - 1738162001, - "6530822144" - ], - [ - 1738162031, - "6530859008" - ], - [ - 1738162061, - "6530666496" - ], - [ - 1738162091, - "6530818048" - ], - [ - 1738162121, - "6531596288" - ], - [ - 1738162151, - "6531211264" - ], - [ - 1738162181, - "6530506752" - ], - [ - 1738162211, - "6531481600" - ], - [ - 1738162241, - "6531796992" - ], - [ - 1738162271, - "6531776512" - ], - [ - 1738162301, - "6532218880" - ], - [ - 1738162331, - "6532534272" - ], - [ - 1738162361, - "6531362816" - ], - [ - 1738162391, - "6531465216" - ], - [ - 1738162421, - "6531506176" - ], - [ - 1738162451, - "6531444736" - ], - [ - 1738162481, - "6531563520" - ], - [ - 1738162511, - "6529724416" - ], - [ - 1738162541, - "6529998848" - ], - [ - 1738162571, - "6530473984" - ], - [ - 1738162601, - "6530330624" - ], - [ - 1738162631, - "6530359296" - ], - [ - 1738162661, - "6531346432" - ], - [ - 1738162691, - "6531612672" - ], - [ - 1738162721, - "6531342336" - ], - [ - 1738162751, - "6531026944" - ], - [ - 1738162781, - "6531420160" - ], - [ - 1738162811, - "6531809280" - ], - [ - 1738162841, - "6532534272" - ], - [ - 1738162871, - "6533292032" - ], - [ - 1738162901, - "6532567040" - ], - [ - 1738162931, - "6533378048" - ], - [ - 1738162961, - "6533582848" - ], - [ - 1738162991, - "6534295552" - ], - [ - 1738163021, - "6533734400" - ], - [ - 1738163051, - "6534025216" - ], - [ - 1738163081, - "6534119424" - ], - [ - 1738163111, - "6534279168" - ], - [ - 1738163141, - "6535139328" - ], - [ - 1738163171, - "6534729728" - ], - [ - 1738163201, - "6534250496" - ], - [ - 1738163231, - "6536454144" - ], - [ - 1738163261, - "6535843840" - ], - [ - 1738163291, - "6536699904" - ], - [ - 1738163321, - "6536892416" - ], - [ - 1738163351, - "6537519104" - ], - [ - 1738163381, - "6537195520" - ], - [ - 1738163411, - "6537035776" - ], - [ - 1738163441, - "6536896512" - ], - [ - 1738163471, - "6537318400" - ], - [ - 1738163501, - "6537957376" - ], - [ - 1738163531, - "6538096640" - ], - [ - 1738163561, - "6538461184" - ], - [ - 1738163591, - "6537748480" - ], - [ - 1738163621, - "6538604544" - ], - [ - 1738163651, - "6540648448" - ], - [ - 1738163681, - "6539235328" - ], - [ - 1738163711, - "6539284480" - ], - [ - 1738163741, - "6539341824" - ], - [ - 1738163771, - "6540587008" - ], - [ - 1738163801, - "6540685312" - ] - ] - } - ] - }, "random_key": "AtEJ", "tool_name": "execute_prometheus_range_query", "start": "2025-01-29T14:46:41Z", diff --git a/tests/llm/fixtures/test_ask_holmes/30_basic_promql_graph_pod_memory/execute_prometheus_range_query.txt b/tests/llm/fixtures/test_ask_holmes/30_basic_promql_graph_pod_memory/execute_prometheus_range_query.txt index 51337603..1510b470 100644 --- a/tests/llm/fixtures/test_ask_holmes/30_basic_promql_graph_pod_memory/execute_prometheus_range_query.txt +++ b/tests/llm/fixtures/test_ask_holmes/30_basic_promql_graph_pod_memory/execute_prometheus_range_query.txt @@ -1,796 +1,6 @@ {"toolset_name":"prometheus/metrics","tool_name":"execute_prometheus_range_query","match_params":{"query":"container_memory_working_set_bytes{pod=\"robusta-holmes-7956c6bbc7-v5krl\",namespace=\"default\"}","description":"*","start":"*","end":"*","step":"*"}} { "status": "success", - "data": { - "resultType": "matrix", - "result": [ - { - "metric": { - "__name__": "container_memory_working_set_bytes", - "container": "holmes", - "endpoint": "https-metrics", - "id": "/kubelet.slice/kubelet-kubepods.slice/kubelet-kubepods-burstable.slice/kubelet-kubepods-burstable-pod97941d0e_b5af_47a5_bfa4_201b8642d9ee.slice/cri-containerd-5f781194d78dc0b425f67ffee3922e71f9d89251ed6a33e9624b2bd8defe7cf5.scope", - "image": "us-central1-docker.pkg.dev/genuine-flight-317411/devel/holmes:nicolas_test", - "instance": "172.18.0.2:10250", - "job": "kubelet", - "metrics_path": "/metrics/cadvisor", - "name": "5f781194d78dc0b425f67ffee3922e71f9d89251ed6a33e9624b2bd8defe7cf5", - "namespace": "default", - "node": "grafana-cloud-control-plane", - "pod": "robusta-holmes-7956c6bbc7-v5krl", - "service": "robusta-kube-prometheus-st-kubelet" - }, - "values": [ - [ - 1739973438, - "330829824" - ], - [ - 1739973468, - "330825728" - ], - [ - 1739973498, - "330829824" - ], - [ - 1739973528, - "330829824" - ], - [ - 1739973558, - "330829824" - ], - [ - 1739973588, - "330829824" - ], - [ - 1739973618, - "330833920" - ], - [ - 1739973648, - "330833920" - ], - [ - 1739973678, - "330829824" - ], - [ - 1739973708, - "330829824" - ], - [ - 1739973738, - "330833920" - ], - [ - 1739973768, - "330833920" - ], - [ - 1739973798, - "330829824" - ], - [ - 1739973828, - "330838016" - ], - [ - 1739973858, - "332267520" - ], - [ - 1739973888, - "333791232" - ], - [ - 1739973918, - "333791232" - ], - [ - 1739973948, - "334630912" - ], - [ - 1739973978, - "337260544" - ], - [ - 1739974008, - "339595264" - ], - [ - 1739974038, - "339845120" - ], - [ - 1739974068, - "339853312" - ], - [ - 1739974098, - "339853312" - ], - [ - 1739974128, - "339845120" - ], - [ - 1739974158, - "339849216" - ], - [ - 1739974188, - "339849216" - ], - [ - 1739974218, - "339845120" - ], - [ - 1739974248, - "339845120" - ], - [ - 1739974278, - "339849216" - ], - [ - 1739974308, - "339845120" - ], - [ - 1739974338, - "339845120" - ], - [ - 1739974368, - "339845120" - ], - [ - 1739974398, - "339849216" - ], - [ - 1739974428, - "339849216" - ], - [ - 1739974458, - "339845120" - ], - [ - 1739974488, - "339857408" - ], - [ - 1739974518, - "339857408" - ], - [ - 1739974548, - "339857408" - ], - [ - 1739974578, - "339853312" - ], - [ - 1739974608, - "339861504" - ], - [ - 1739974638, - "339861504" - ], - [ - 1739974668, - "339853312" - ], - [ - 1739974698, - "339853312" - ], - [ - 1739974728, - "339853312" - ], - [ - 1739974758, - "339853312" - ], - [ - 1739974788, - "339853312" - ], - [ - 1739974818, - "339861504" - ], - [ - 1739974848, - "339861504" - ], - [ - 1739974878, - "339853312" - ], - [ - 1739974908, - "339853312" - ], - [ - 1739974938, - "339857408" - ], - [ - 1739974968, - "339857408" - ], - [ - 1739974998, - "339853312" - ], - [ - 1739975028, - "339865600" - ], - [ - 1739975058, - "339865600" - ], - [ - 1739975088, - "339861504" - ], - [ - 1739975118, - "339861504" - ], - [ - 1739975148, - "339861504" - ], - [ - 1739975178, - "339865600" - ], - [ - 1739975208, - "339861504" - ], - [ - 1739975238, - "339861504" - ] - ] - }, - { - "metric": { - "__name__": "container_memory_working_set_bytes", - "endpoint": "https-metrics", - "id": "/kubelet.slice/kubelet-kubepods.slice/kubelet-kubepods-burstable.slice/kubelet-kubepods-burstable-pod97941d0e_b5af_47a5_bfa4_201b8642d9ee.slice", - "instance": "172.18.0.2:10250", - "job": "kubelet", - "metrics_path": "/metrics/cadvisor", - "namespace": "default", - "node": "grafana-cloud-control-plane", - "pod": "robusta-holmes-7956c6bbc7-v5krl", - "service": "robusta-kube-prometheus-st-kubelet" - }, - "values": [ - [ - 1739973438, - "331214848" - ], - [ - 1739973468, - "331210752" - ], - [ - 1739973498, - "331214848" - ], - [ - 1739973528, - "331214848" - ], - [ - 1739973558, - "331214848" - ], - [ - 1739973588, - "331214848" - ], - [ - 1739973618, - "331218944" - ], - [ - 1739973648, - "331218944" - ], - [ - 1739973678, - "331214848" - ], - [ - 1739973708, - "331214848" - ], - [ - 1739973738, - "331218944" - ], - [ - 1739973768, - "331218944" - ], - [ - 1739973798, - "331214848" - ], - [ - 1739973828, - "331223040" - ], - [ - 1739973858, - "332652544" - ], - [ - 1739973888, - "334176256" - ], - [ - 1739973918, - "334925824" - ], - [ - 1739973948, - "335015936" - ], - [ - 1739973978, - "339984384" - ], - [ - 1739974008, - "340000768" - ], - [ - 1739974038, - "340230144" - ], - [ - 1739974068, - "340238336" - ], - [ - 1739974098, - "340238336" - ], - [ - 1739974128, - "340230144" - ], - [ - 1739974158, - "340234240" - ], - [ - 1739974188, - "340234240" - ], - [ - 1739974218, - "340230144" - ], - [ - 1739974248, - "340230144" - ], - [ - 1739974278, - "340234240" - ], - [ - 1739974308, - "340234240" - ], - [ - 1739974338, - "340230144" - ], - [ - 1739974368, - "340230144" - ], - [ - 1739974398, - "340234240" - ], - [ - 1739974428, - "340230144" - ], - [ - 1739974458, - "340230144" - ], - [ - 1739974488, - "340242432" - ], - [ - 1739974518, - "340242432" - ], - [ - 1739974548, - "340238336" - ], - [ - 1739974578, - "340238336" - ], - [ - 1739974608, - "340246528" - ], - [ - 1739974638, - "340246528" - ], - [ - 1739974668, - "340238336" - ], - [ - 1739974698, - "340238336" - ], - [ - 1739974728, - "340238336" - ], - [ - 1739974758, - "340238336" - ], - [ - 1739974788, - "340238336" - ], - [ - 1739974818, - "340246528" - ], - [ - 1739974848, - "340246528" - ], - [ - 1739974878, - "340238336" - ], - [ - 1739974908, - "340238336" - ], - [ - 1739974938, - "340242432" - ], - [ - 1739974968, - "340242432" - ], - [ - 1739974998, - "340238336" - ], - [ - 1739975028, - "340250624" - ], - [ - 1739975058, - "340250624" - ], - [ - 1739975088, - "340250624" - ], - [ - 1739975118, - "340246528" - ], - [ - 1739975148, - "340250624" - ], - [ - 1739975178, - "340250624" - ], - [ - 1739975208, - "340250624" - ], - [ - 1739975238, - "340246528" - ] - ] - }, - { - "metric": { - "__name__": "container_memory_working_set_bytes", - "endpoint": "https-metrics", - "id": "/kubelet.slice/kubelet-kubepods.slice/kubelet-kubepods-burstable.slice/kubelet-kubepods-burstable-pod97941d0e_b5af_47a5_bfa4_201b8642d9ee.slice/cri-containerd-b7185571ac94695057fb0fb47fb518454f7ebaf569ac79c025f5045916e2232e.scope", - "image": "registry.k8s.io/pause:3.10", - "instance": "172.18.0.2:10250", - "job": "kubelet", - "metrics_path": "/metrics/cadvisor", - "name": "b7185571ac94695057fb0fb47fb518454f7ebaf569ac79c025f5045916e2232e", - "namespace": "default", - "node": "grafana-cloud-control-plane", - "pod": "robusta-holmes-7956c6bbc7-v5krl", - "service": "robusta-kube-prometheus-st-kubelet" - }, - "values": [ - [ - 1739973438, - "212992" - ], - [ - 1739973468, - "212992" - ], - [ - 1739973498, - "212992" - ], - [ - 1739973528, - "212992" - ], - [ - 1739973558, - "212992" - ], - [ - 1739973588, - "212992" - ], - [ - 1739973618, - "212992" - ], - [ - 1739973648, - "212992" - ], - [ - 1739973678, - "212992" - ], - [ - 1739973708, - "212992" - ], - [ - 1739973738, - "212992" - ], - [ - 1739973768, - "212992" - ], - [ - 1739973798, - "212992" - ], - [ - 1739973828, - "212992" - ], - [ - 1739973858, - "212992" - ], - [ - 1739973888, - "212992" - ], - [ - 1739973918, - "212992" - ], - [ - 1739973948, - "212992" - ], - [ - 1739973978, - "212992" - ], - [ - 1739974008, - "212992" - ], - [ - 1739974038, - "212992" - ], - [ - 1739974068, - "212992" - ], - [ - 1739974098, - "212992" - ], - [ - 1739974128, - "212992" - ], - [ - 1739974158, - "212992" - ], - [ - 1739974188, - "212992" - ], - [ - 1739974218, - "212992" - ], - [ - 1739974248, - "212992" - ], - [ - 1739974278, - "212992" - ], - [ - 1739974308, - "212992" - ], - [ - 1739974338, - "212992" - ], - [ - 1739974368, - "212992" - ], - [ - 1739974398, - "212992" - ], - [ - 1739974428, - "212992" - ], - [ - 1739974458, - "212992" - ], - [ - 1739974488, - "212992" - ], - [ - 1739974518, - "212992" - ], - [ - 1739974548, - "212992" - ], - [ - 1739974578, - "212992" - ], - [ - 1739974608, - "212992" - ], - [ - 1739974638, - "212992" - ], - [ - 1739974668, - "212992" - ], - [ - 1739974698, - "212992" - ], - [ - 1739974728, - "212992" - ], - [ - 1739974758, - "212992" - ], - [ - 1739974788, - "212992" - ], - [ - 1739974818, - "212992" - ], - [ - 1739974848, - "212992" - ], - [ - 1739974878, - "212992" - ], - [ - 1739974908, - "212992" - ], - [ - 1739974938, - "212992" - ], - [ - 1739974968, - "212992" - ], - [ - 1739974998, - "212992" - ], - [ - 1739975028, - "212992" - ], - [ - 1739975058, - "212992" - ], - [ - 1739975088, - "212992" - ], - [ - 1739975118, - "212992" - ], - [ - 1739975148, - "212992" - ], - [ - 1739975178, - "212992" - ], - [ - 1739975208, - "212992" - ], - [ - 1739975238, - "212992" - ] - ] - } - ] - }, "random_key": "vwJA", "tool_name": "execute_prometheus_range_query", "description": "Memory usage for robusta-holmes pod", diff --git a/tests/llm/fixtures/test_ask_holmes/30_basic_promql_graph_pod_memory/test_case.yaml b/tests/llm/fixtures/test_ask_holmes/30_basic_promql_graph_pod_memory/test_case.yaml index e8e5fae8..45d2c003 100644 --- a/tests/llm/fixtures/test_ask_holmes/30_basic_promql_graph_pod_memory/test_case.yaml +++ b/tests/llm/fixtures/test_ask_holmes/30_basic_promql_graph_pod_memory/test_case.yaml @@ -3,3 +3,4 @@ expected_output: - 'The answer should include something similar to << { tool_name: "execute_prometheus_range_query", random_key: "vwJA" } >>. Both tool_name and random_key MUST absolutely be present and the same as mentioned here.' evaluation: correctness: 1 +generate_mocks: True diff --git a/tests/llm/fixtures/test_ask_holmes/31_basic_promql_graph_pod_cpu/execute_prometheus_range_query.txt b/tests/llm/fixtures/test_ask_holmes/31_basic_promql_graph_pod_cpu/execute_prometheus_range_query.txt index 1d03d9e6..1bbe554e 100644 --- a/tests/llm/fixtures/test_ask_holmes/31_basic_promql_graph_pod_cpu/execute_prometheus_range_query.txt +++ b/tests/llm/fixtures/test_ask_holmes/31_basic_promql_graph_pod_cpu/execute_prometheus_range_query.txt @@ -1,262 +1,6 @@ {"toolset_name":"prometheus/metrics","tool_name":"execute_prometheus_range_query","match_params":{"query":"sum(rate(container_cpu_usage_seconds_total{pod=\"robusta-holmes-7956c6bbc7-v5krl\"}[5m])) by (pod)","description":"*","start":"*","end":"*","step":"*"}} { "status": "success", - "data": { - "resultType": "matrix", - "result": [ - { - "metric": { - "pod": "robusta-holmes-7956c6bbc7-v5krl" - }, - "values": [ - [ - 1739974068, - "0.002144419836653223" - ], - [ - 1739974098, - "0.002099330660706121" - ], - [ - 1739974128, - "0.0021036509153405693" - ], - [ - 1739974158, - "0.0021460544685770254" - ], - [ - 1739974188, - "0.0021387821662440226" - ], - [ - 1739974218, - "0.002004565361662111" - ], - [ - 1739974248, - "0.0020237542373384224" - ], - [ - 1739974278, - "0.002004929155904469" - ], - [ - 1739974308, - "0.001951633799832392" - ], - [ - 1739974338, - "0.0018451832562585316" - ], - [ - 1739974368, - "0.0018120988130422558" - ], - [ - 1739974398, - "0.0018715505096282671" - ], - [ - 1739974428, - "0.0018647436497913313" - ], - [ - 1739974458, - "0.0018961485265557838" - ], - [ - 1739974488, - "0.0020138379880019113" - ], - [ - 1739974518, - "0.002031105182266877" - ], - [ - 1739974548, - "0.0020361105227313095" - ], - [ - 1739974578, - "0.0020592061839086193" - ], - [ - 1739974608, - "0.0020878273632490294" - ], - [ - 1739974638, - "0.0021487721818257426" - ], - [ - 1739974668, - "0.002108847216571825" - ], - [ - 1739974698, - "0.0021441882083414717" - ], - [ - 1739974728, - "0.0021377254052434578" - ], - [ - 1739974758, - "0.0020931879628660645" - ], - [ - 1739974788, - "0.0020756032582432953" - ], - [ - 1739974818, - "0.002074645673087472" - ], - [ - 1739974848, - "0.002033403487826402" - ], - [ - 1739974878, - "0.0019511100667039417" - ], - [ - 1739974908, - "0.00197596000785474" - ], - [ - 1739974938, - "0.0020125591978856364" - ], - [ - 1739974968, - "0.0019215618792556713" - ], - [ - 1739974998, - "0.0019096397099409163" - ], - [ - 1739975028, - "0.001929468931314625" - ], - [ - 1739975058, - "0.0019307202542465682" - ], - [ - 1739975088, - "0.0019097419540450142" - ], - [ - 1739975118, - "0.001974109446740776" - ], - [ - 1739975148, - "0.002029847480417659" - ], - [ - 1739975178, - "0.002065951835505558" - ], - [ - 1739975208, - "0.002063288358376739" - ], - [ - 1739975238, - "0.0021519573827565273" - ], - [ - 1739975268, - "0.0022900053599633225" - ], - [ - 1739975298, - "0.002249468266972769" - ], - [ - 1739975328, - "0.002384692159377845" - ], - [ - 1739975358, - "0.0024260717354572586" - ], - [ - 1739975388, - "0.0025115599194239623" - ], - [ - 1739975418, - "0.002497366767008242" - ], - [ - 1739975448, - "0.002487755825493165" - ], - [ - 1739975478, - "0.0025284455334128406" - ], - [ - 1739975508, - "0.0025086424254349496" - ], - [ - 1739975538, - "0.0024484717650784748" - ], - [ - 1739975568, - "0.0023836217846226063" - ], - [ - 1739975598, - "0.0025764565105480873" - ], - [ - 1739975628, - "0.0026345982933064973" - ], - [ - 1739975658, - "0.0026226150739384964" - ], - [ - 1739975688, - "0.002701519318107138" - ], - [ - 1739975718, - "0.002782986320627817" - ], - [ - 1739975748, - "0.0027171950968434386" - ], - [ - 1739975778, - "0.0027186090281877775" - ], - [ - 1739975808, - "0.00273863781620118" - ], - [ - 1739975838, - "0.0027098064769902393" - ], - [ - 1739975868, - "0.002603972369516789" - ] - ] - } - ] - }, "random_key": "HBGf", "tool_name": "execute_prometheus_range_query", "description": "CPU usage for robusta-holmes pod", diff --git a/tests/llm/fixtures/test_ask_holmes/31_basic_promql_graph_pod_cpu/execute_prometheus_range_query_with_namespace.txt b/tests/llm/fixtures/test_ask_holmes/31_basic_promql_graph_pod_cpu/execute_prometheus_range_query_with_namespace.txt index 9d0fc3b9..9fb7bcbc 100644 --- a/tests/llm/fixtures/test_ask_holmes/31_basic_promql_graph_pod_cpu/execute_prometheus_range_query_with_namespace.txt +++ b/tests/llm/fixtures/test_ask_holmes/31_basic_promql_graph_pod_cpu/execute_prometheus_range_query_with_namespace.txt @@ -1,262 +1,6 @@ {"toolset_name":"prometheus/metrics","tool_name":"execute_prometheus_range_query","match_params":{"query":"sum(rate(container_cpu_usage_seconds_total{pod=\"robusta-holmes-7956c6bbc7-v5krl\", namespace=\"default\"}[5m])) by (pod)","description":"*","start":"*","end":"*","step":"*"}} { "status": "success", - "data": { - "resultType": "matrix", - "result": [ - { - "metric": { - "pod": "robusta-holmes-7956c6bbc7-v5krl" - }, - "values": [ - [ - 1739974068, - "0.002144419836653223" - ], - [ - 1739974098, - "0.002099330660706121" - ], - [ - 1739974128, - "0.0021036509153405693" - ], - [ - 1739974158, - "0.0021460544685770254" - ], - [ - 1739974188, - "0.0021387821662440226" - ], - [ - 1739974218, - "0.002004565361662111" - ], - [ - 1739974248, - "0.0020237542373384224" - ], - [ - 1739974278, - "0.002004929155904469" - ], - [ - 1739974308, - "0.001951633799832392" - ], - [ - 1739974338, - "0.0018451832562585316" - ], - [ - 1739974368, - "0.0018120988130422558" - ], - [ - 1739974398, - "0.0018715505096282671" - ], - [ - 1739974428, - "0.0018647436497913313" - ], - [ - 1739974458, - "0.0018961485265557838" - ], - [ - 1739974488, - "0.0020138379880019113" - ], - [ - 1739974518, - "0.002031105182266877" - ], - [ - 1739974548, - "0.0020361105227313095" - ], - [ - 1739974578, - "0.0020592061839086193" - ], - [ - 1739974608, - "0.0020878273632490294" - ], - [ - 1739974638, - "0.0021487721818257426" - ], - [ - 1739974668, - "0.002108847216571825" - ], - [ - 1739974698, - "0.0021441882083414717" - ], - [ - 1739974728, - "0.0021377254052434578" - ], - [ - 1739974758, - "0.0020931879628660645" - ], - [ - 1739974788, - "0.0020756032582432953" - ], - [ - 1739974818, - "0.002074645673087472" - ], - [ - 1739974848, - "0.002033403487826402" - ], - [ - 1739974878, - "0.0019511100667039417" - ], - [ - 1739974908, - "0.00197596000785474" - ], - [ - 1739974938, - "0.0020125591978856364" - ], - [ - 1739974968, - "0.0019215618792556713" - ], - [ - 1739974998, - "0.0019096397099409163" - ], - [ - 1739975028, - "0.001929468931314625" - ], - [ - 1739975058, - "0.0019307202542465682" - ], - [ - 1739975088, - "0.0019097419540450142" - ], - [ - 1739975118, - "0.001974109446740776" - ], - [ - 1739975148, - "0.002029847480417659" - ], - [ - 1739975178, - "0.002065951835505558" - ], - [ - 1739975208, - "0.002063288358376739" - ], - [ - 1739975238, - "0.0021519573827565273" - ], - [ - 1739975268, - "0.0022900053599633225" - ], - [ - 1739975298, - "0.002249468266972769" - ], - [ - 1739975328, - "0.002384692159377845" - ], - [ - 1739975358, - "0.0024260717354572586" - ], - [ - 1739975388, - "0.0025115599194239623" - ], - [ - 1739975418, - "0.002497366767008242" - ], - [ - 1739975448, - "0.002487755825493165" - ], - [ - 1739975478, - "0.0025284455334128406" - ], - [ - 1739975508, - "0.0025086424254349496" - ], - [ - 1739975538, - "0.0024484717650784748" - ], - [ - 1739975568, - "0.0023836217846226063" - ], - [ - 1739975598, - "0.0025764565105480873" - ], - [ - 1739975628, - "0.0026345982933064973" - ], - [ - 1739975658, - "0.0026226150739384964" - ], - [ - 1739975688, - "0.002701519318107138" - ], - [ - 1739975718, - "0.002782986320627817" - ], - [ - 1739975748, - "0.0027171950968434386" - ], - [ - 1739975778, - "0.0027186090281877775" - ], - [ - 1739975808, - "0.00273863781620118" - ], - [ - 1739975838, - "0.0027098064769902393" - ], - [ - 1739975868, - "0.002603972369516789" - ] - ] - } - ] - }, "random_key": "Rs0H", "tool_name": "execute_prometheus_range_query", "description": "CPU usage for robusta-holmes pod", diff --git a/tests/llm/fixtures/test_ask_holmes/32_oom_kill_graph_memory_consumption/execute_prometheus_range_query.txt b/tests/llm/fixtures/test_ask_holmes/32_oom_kill_graph_memory_consumption/execute_prometheus_range_query.txt deleted file mode 100644 index edd3eab9..00000000 --- a/tests/llm/fixtures/test_ask_holmes/32_oom_kill_graph_memory_consumption/execute_prometheus_range_query.txt +++ /dev/null @@ -1,125 +0,0 @@ -{"toolset_name":"prometheus/metrics","tool_name":"execute_prometheus_range_query","match_params":{"query":"max(container_memory_working_set_bytes{pod=\"analytics-exporter-slow-684486cfb7-2b6lf\"})","description":"*","start":"*","end":"*","step":"*"}} -{ - "status": "success", - "data": { - "resultType": "matrix", - "result": [ - { - "metric": {}, - "values": [ - [ - 1739535484, - "20721664" - ], - [ - 1739535784, - "114737152" - ], - [ - 1739536084, - "283242496" - ], - [ - 1739536384, - "470319104" - ], - [ - 1739536684, - "658276352" - ], - [ - 1739536984, - "852058112" - ], - [ - 1739537284, - "1040011264" - ], - [ - 1739537584, - "1225826304" - ], - [ - 1739537884, - "1417961472" - ], - [ - 1739538184, - "1603776512" - ], - [ - 1739538484, - "1796292608" - ], - [ - 1739538784, - "1984634880" - ], - [ - 1739539084, - "2079096832" - ], - [ - 1739539384, - "162766848" - ], - [ - 1739539684, - "352763904" - ], - [ - 1739539984, - "539447296" - ], - [ - 1739540284, - "731578368" - ], - [ - 1739540584, - "916525056" - ], - [ - 1739540884, - "1104474112" - ], - [ - 1739541184, - "1299525632" - ], - [ - 1739541484, - "1486602240" - ], - [ - 1739541784, - "1677078528" - ], - [ - 1739542084, - "1866686464" - ], - [ - 1739542384, - "2055421952" - ], - [ - 1739542684, - "2091687936" - ], - [ - 1739542984, - "230117376" - ] - ] - } - ] - }, - "random_key": "of68", - "tool_name": "execute_prometheus_range_query", - "description": "Memory usage for analytics-exporter-slow pod", - "query": "max(container_memory_working_set_bytes{pod=\"analytics-exporter-slow-684486cfb7-2b6lf\"})", - "start": "1739456584", - "end": "1739542984", - "step": 300 -} diff --git a/tests/llm/fixtures/test_ask_holmes/32_oom_kill_graph_memory_consumption/execute_prometheus_range_query_by_pod.txt b/tests/llm/fixtures/test_ask_holmes/32_oom_kill_graph_memory_consumption/execute_prometheus_range_query_by_pod.txt deleted file mode 100644 index 8089127f..00000000 --- a/tests/llm/fixtures/test_ask_holmes/32_oom_kill_graph_memory_consumption/execute_prometheus_range_query_by_pod.txt +++ /dev/null @@ -1,127 +0,0 @@ -{"toolset_name":"prometheus/metrics","tool_name":"execute_prometheus_range_query","match_params":{"query":"max(container_memory_working_set_bytes{pod=\"analytics-exporter-slow-684486cfb7-2b6lf\"}) by (pod)","description":"*","start":"*","end":"*","step":"*"}} -{ - "status": "success", - "data": { - "resultType": "matrix", - "result": [ - { - "metric": { - "pod": "analytics-exporter-slow-684486cfb7-2b6lf" - }, - "values": [ - [ - 1739535484, - "20721664" - ], - [ - 1739535784, - "114737152" - ], - [ - 1739536084, - "283242496" - ], - [ - 1739536384, - "470319104" - ], - [ - 1739536684, - "658276352" - ], - [ - 1739536984, - "852058112" - ], - [ - 1739537284, - "1040011264" - ], - [ - 1739537584, - "1225826304" - ], - [ - 1739537884, - "1417961472" - ], - [ - 1739538184, - "1603776512" - ], - [ - 1739538484, - "1796292608" - ], - [ - 1739538784, - "1984634880" - ], - [ - 1739539084, - "2079096832" - ], - [ - 1739539384, - "162766848" - ], - [ - 1739539684, - "352763904" - ], - [ - 1739539984, - "539447296" - ], - [ - 1739540284, - "731578368" - ], - [ - 1739540584, - "916525056" - ], - [ - 1739540884, - "1104474112" - ], - [ - 1739541184, - "1299525632" - ], - [ - 1739541484, - "1486602240" - ], - [ - 1739541784, - "1677078528" - ], - [ - 1739542084, - "1866686464" - ], - [ - 1739542384, - "2055421952" - ], - [ - 1739542684, - "2091687936" - ], - [ - 1739542984, - "230117376" - ] - ] - } - ] - }, - "random_key": "W3w4", - "tool_name": "execute_prometheus_range_query", - "description": "Memory usage for analytics-exporter-slow pod over time", - "query": "max(container_memory_working_set_bytes{pod=\"analytics-exporter-slow-684486cfb7-2b6lf\"}) by (pod)", - "start": "1739456584", - "end": "1739542984", - "step": 300 -} diff --git a/tests/llm/fixtures/test_ask_holmes/32_oom_kill_graph_memory_consumption/execute_prometheus_range_query_with_namespace.txt b/tests/llm/fixtures/test_ask_holmes/32_oom_kill_graph_memory_consumption/execute_prometheus_range_query_with_namespace.txt deleted file mode 100644 index bd6debc6..00000000 --- a/tests/llm/fixtures/test_ask_holmes/32_oom_kill_graph_memory_consumption/execute_prometheus_range_query_with_namespace.txt +++ /dev/null @@ -1,125 +0,0 @@ -{"toolset_name":"prometheus/metrics","tool_name":"execute_prometheus_range_query","match_params":{"query":"max(container_memory_working_set_bytes{pod=\"analytics-exporter-slow-684486cfb7-2b6lf\",namespace=\"default\"})","description":"*","start":"*","end":"*","step":"*"}} -{ - "status": "success", - "data": { - "resultType": "matrix", - "result": [ - { - "metric": {}, - "values": [ - [ - 1739535484, - "20721664" - ], - [ - 1739535784, - "114737152" - ], - [ - 1739536084, - "283242496" - ], - [ - 1739536384, - "470319104" - ], - [ - 1739536684, - "658276352" - ], - [ - 1739536984, - "852058112" - ], - [ - 1739537284, - "1040011264" - ], - [ - 1739537584, - "1225826304" - ], - [ - 1739537884, - "1417961472" - ], - [ - 1739538184, - "1603776512" - ], - [ - 1739538484, - "1796292608" - ], - [ - 1739538784, - "1984634880" - ], - [ - 1739539084, - "2079096832" - ], - [ - 1739539384, - "162766848" - ], - [ - 1739539684, - "352763904" - ], - [ - 1739539984, - "539447296" - ], - [ - 1739540284, - "731578368" - ], - [ - 1739540584, - "916525056" - ], - [ - 1739540884, - "1104474112" - ], - [ - 1739541184, - "1299525632" - ], - [ - 1739541484, - "1486602240" - ], - [ - 1739541784, - "1677078528" - ], - [ - 1739542084, - "1866686464" - ], - [ - 1739542384, - "2055421952" - ], - [ - 1739542684, - "2091687936" - ], - [ - 1739542984, - "230117376" - ] - ] - } - ] - }, - "random_key": "envg", - "tool_name": "execute_prometheus_range_query", - "description": "Memory usage for analytics-exporter-slow pod", - "query": "max(container_memory_working_set_bytes{pod=\"analytics-exporter-slow-684486cfb7-2b6lf\",namespace=\"default\"})", - "start": "1739456584", - "end": "1739542984", - "step": 300 -} diff --git a/tests/llm/fixtures/test_ask_holmes/32_oom_kill_graph_memory_consumption/execute_prometheus_range_query_with_namespace_by_pod.txt b/tests/llm/fixtures/test_ask_holmes/32_oom_kill_graph_memory_consumption/execute_prometheus_range_query_with_namespace_by_pod.txt deleted file mode 100644 index 736767f8..00000000 --- a/tests/llm/fixtures/test_ask_holmes/32_oom_kill_graph_memory_consumption/execute_prometheus_range_query_with_namespace_by_pod.txt +++ /dev/null @@ -1,267 +0,0 @@ -{"toolset_name":"prometheus/metrics","tool_name":"execute_prometheus_range_query","match_params":{"query":"max(container_memory_working_set_bytes{pod=\"analytics-exporter-slow-684486cfb7-2b6lf\",namespace=\"default\"}) by (pod)","description":"*","start":"*","end":"*","step":"*"}} -{ - "status": "success", - "data": { - "resultType": "matrix", - "result": [ - { - "metric": { - "pod": "analytics-exporter-slow-684486cfb7-2b6lf" - }, - "values": [ - [ - 1739539384, - "162766848" - ], - [ - 1739539444, - "200687616" - ], - [ - 1739539504, - "236470272" - ], - [ - 1739539564, - "272736256" - ], - [ - 1739539624, - "313577472" - ], - [ - 1739539684, - "352763904" - ], - [ - 1739539744, - "390684672" - ], - [ - 1739539804, - "424812544" - ], - [ - 1739539864, - "466132992" - ], - [ - 1739539924, - "501526528" - ], - [ - 1739539984, - "539447296" - ], - [ - 1739540044, - "579895296" - ], - [ - 1739540104, - "618254336" - ], - [ - 1739540164, - "653602816" - ], - [ - 1739540224, - "688996352" - ], - [ - 1739540284, - "731578368" - ], - [ - 1739540344, - "768630784" - ], - [ - 1739540404, - "802762752" - ], - [ - 1739540464, - "844083200" - ], - [ - 1739540524, - "882003968" - ], - [ - 1739540584, - "916525056" - ], - [ - 1739540644, - "959504384" - ], - [ - 1739540704, - "995766272" - ], - [ - 1739540764, - "1031159808" - ], - [ - 1739540824, - "1070784512" - ], - [ - 1739540884, - "1104474112" - ], - [ - 1739540944, - "1147842560" - ], - [ - 1739541004, - "1182842880" - ], - [ - 1739541064, - "1222422528" - ], - [ - 1739541124, - "1259077632" - ], - [ - 1739541184, - "1299525632" - ], - [ - 1739541244, - "1335791616" - ], - [ - 1739541304, - "1371578368" - ], - [ - 1739541364, - "1410367488" - ], - [ - 1739541424, - "1452081152" - ], - [ - 1739541484, - "1486602240" - ], - [ - 1739541544, - "1525788672" - ], - [ - 1739541604, - "1564975104" - ], - [ - 1739541664, - "1600368640" - ], - [ - 1739541724, - "1637023744" - ], - [ - 1739541784, - "1677078528" - ], - [ - 1739541844, - "1715392512" - ], - [ - 1739541904, - "1753313280" - ], - [ - 1739541964, - "1789972480" - ], - [ - 1739542024, - "1826234368" - ], - [ - 1739542084, - "1866686464" - ], - [ - 1739542144, - "1903738880" - ], - [ - 1739542204, - "1939132416" - ], - [ - 1739542264, - "1974525952" - ], - [ - 1739542324, - "2014973952" - ], - [ - 1739542384, - "2055421952" - ], - [ - 1739542444, - "2092081152" - ], - [ - 1739542504, - "2091687936" - ], - [ - 1739542564, - "2091687936" - ], - [ - 1739542624, - "2091687936" - ], - [ - 1739542684, - "2091687936" - ], - [ - 1739542744, - "105877504" - ], - [ - 1739542804, - "127098880" - ], - [ - 1739542864, - "155176960" - ], - [ - 1739542924, - "194359296" - ], - [ - 1739542984, - "230117376" - ] - ] - } - ] - }, - "random_key": "UEMD", - "tool_name": "execute_prometheus_range_query", - "description": "Memory usage for analytics-exporter-slow deployment", - "query": "max(container_memory_working_set_bytes{pod=\"analytics-exporter-slow-684486cfb7-2b6lf\", namespace=\"default\"}) by (pod)", - "start": "2025-02-14T13:23:04Z", - "end": "2025-02-14T14:23:04Z", - "step": 60 -} diff --git a/tests/llm/fixtures/test_ask_holmes/32_oom_kill_graph_memory_consumption/get_current_time.txt b/tests/llm/fixtures/test_ask_holmes/32_oom_kill_graph_memory_consumption/get_current_time.txt deleted file mode 100644 index d9ab289d..00000000 --- a/tests/llm/fixtures/test_ask_holmes/32_oom_kill_graph_memory_consumption/get_current_time.txt +++ /dev/null @@ -1,2 +0,0 @@ -{"toolset_name":"datetime","tool_name":"get_current_time","match_params":{}} -The current UTC date and time are 2025-02-14 14:23:04.516591+00:00. The current UTC timestamp in seconds is 1739542984. diff --git a/tests/llm/fixtures/test_ask_holmes/32_oom_kill_graph_memory_consumption/kubectl_find_resource.txt b/tests/llm/fixtures/test_ask_holmes/32_oom_kill_graph_memory_consumption/kubectl_find_resource.txt deleted file mode 100644 index 0bddc820..00000000 --- a/tests/llm/fixtures/test_ask_holmes/32_oom_kill_graph_memory_consumption/kubectl_find_resource.txt +++ /dev/null @@ -1,5 +0,0 @@ -{"toolset_name":"kubernetes/core","tool_name":"kubectl_find_resource","match_params":{"kind":"deployment","keyword":"analytics-exporter-slow"}} -stdout: -default analytics-exporter-slow 1/1 1 1 4d6h analytics-exporter-slow us-central1-docker.pkg.dev/genuine-flight-317411/devel/memory-eater:1.0 app=analytics-exporter-slow - -stderr: diff --git a/tests/llm/fixtures/test_ask_holmes/32_oom_kill_graph_memory_consumption/kubectl_get_by_name.txt b/tests/llm/fixtures/test_ask_holmes/32_oom_kill_graph_memory_consumption/kubectl_get_by_name.txt deleted file mode 100644 index df2d6c43..00000000 --- a/tests/llm/fixtures/test_ask_holmes/32_oom_kill_graph_memory_consumption/kubectl_get_by_name.txt +++ /dev/null @@ -1,6 +0,0 @@ -{"toolset_name":"kubernetes/core","tool_name":"kubectl_get_by_name","match_params":{"kind":"deployment","name":"analytics-exporter-slow","namespace":"default"}} -stdout: -NAME READY UP-TO-DATE AVAILABLE AGE CONTAINERS IMAGES SELECTOR LABELS -analytics-exporter-slow 1/1 1 1 4d6h analytics-exporter-slow us-central1-docker.pkg.dev/genuine-flight-317411/devel/memory-eater:1.0 app=analytics-exporter-slow - -stderr: diff --git a/tests/llm/fixtures/test_ask_holmes/32_oom_kill_graph_memory_consumption/kubectl_lineage_children.txt b/tests/llm/fixtures/test_ask_holmes/32_oom_kill_graph_memory_consumption/kubectl_lineage_children.txt deleted file mode 100644 index 7c791057..00000000 --- a/tests/llm/fixtures/test_ask_holmes/32_oom_kill_graph_memory_consumption/kubectl_lineage_children.txt +++ /dev/null @@ -1,23 +0,0 @@ -{"toolset_name":"kubernetes/kube-lineage-extras","tool_name":"kubectl_lineage_children","match_params":{"kind":"deployment","name":"analytics-exporter-slow","namespace":"default"}} -stdout: -NAME READY STATUS AGE -Deployment/analytics-exporter-slow 1/1 4d6h -├── ReplicaSet/analytics-exporter-slow-55644bc6b5 0/0 3h1m -├── ReplicaSet/analytics-exporter-slow-55b58fff76 0/0 3h10m -├── ReplicaSet/analytics-exporter-slow-65c98f9d5c 0/0 4d5h -├── ReplicaSet/analytics-exporter-slow-66dc8844dc 0/0 4d6h -├── ReplicaSet/analytics-exporter-slow-684486cfb7 1/1 125m -│ └── Pod/analytics-exporter-slow-684486cfb7-2b6lf 1/1 Running 125m -│ ├── Event/analytics-exporter-slow-684486cfb7-2b6lf.18241249922da068 - Pulling: Pulling image "us-central1-docker.pkg.dev/genuine-flight-317411/devel/memory-eater:1.0" (x3) 125m -│ ├── Event/analytics-exporter-slow-684486cfb7-2b6lf.18241249ca3e91dc - Created: Created container analytics-exporter-slow (x3) 125m -│ ├── Event/analytics-exporter-slow-684486cfb7-2b6lf.18241249cf9d4695 - Started: Started container analytics-exporter-slow (x3) 125m -│ ├── Event/analytics-exporter-slow-684486cfb7-2b6lf.182418a2fa33e13f - Pulled: Successfully pulled image "us-central1-docker.pkg.dev/genuine-flight-317411/devel/memory-eater:1.0" in 1.076s (1.076s including waiting). Image size: 47485901 bytes. 8m59s -│ └── Service/kubernetes - 66d -├── ReplicaSet/analytics-exporter-slow-6f4c8b64cf 0/0 142m -├── ReplicaSet/analytics-exporter-slow-767fd64696 0/0 3h21m -├── ReplicaSet/analytics-exporter-slow-7b74f69c65 0/0 130m -├── ReplicaSet/analytics-exporter-slow-7b98fb7847 0/0 4d5h -├── ReplicaSet/analytics-exporter-slow-7d596d8689 0/0 4d6h -└── ReplicaSet/analytics-exporter-slow-9ccc96975 0/0 3h3m - -stderr: diff --git a/tests/llm/fixtures/test_ask_holmes/32_oom_kill_graph_memory_consumption/kubectl_top_pods.txt b/tests/llm/fixtures/test_ask_holmes/32_oom_kill_graph_memory_consumption/kubectl_top_pods.txt deleted file mode 100644 index a29a1535..00000000 --- a/tests/llm/fixtures/test_ask_holmes/32_oom_kill_graph_memory_consumption/kubectl_top_pods.txt +++ /dev/null @@ -1,6 +0,0 @@ -{"toolset_name":"kubernetes/live-metrics","tool_name":"kubectl_top_pods","match_params":{}} -Command `kubectl top pods -A` failed with return code 1 -stdout: - -stderr: -error: Metrics API not available diff --git a/tests/llm/fixtures/test_ask_holmes/32_oom_kill_graph_memory_consumption/list_available_metrics.txt b/tests/llm/fixtures/test_ask_holmes/32_oom_kill_graph_memory_consumption/list_available_metrics.txt deleted file mode 100644 index 6b2b8463..00000000 --- a/tests/llm/fixtures/test_ask_holmes/32_oom_kill_graph_memory_consumption/list_available_metrics.txt +++ /dev/null @@ -1,4 +0,0 @@ -{"toolset_name":"prometheus/metrics","tool_name":"list_available_metrics","match_params":{"name_filter":"container_memory_working_set_bytes"}} -Metric | Description | Type | Labels ----------------------------------------------------------------------------------------------------- -container_memory_working_set_bytes | Current working set in bytes. | gauge | container, endpoint, id, image, instance, job, metrics_path, name, namespace, node, pod, service diff --git a/tests/llm/fixtures/test_ask_holmes/32_oom_kill_graph_memory_consumption/slow_oom_deployment.yaml b/tests/llm/fixtures/test_ask_holmes/32_oom_kill_graph_memory_consumption/slow_oom_deployment.yaml deleted file mode 100644 index 1e21c191..00000000 --- a/tests/llm/fixtures/test_ask_holmes/32_oom_kill_graph_memory_consumption/slow_oom_deployment.yaml +++ /dev/null @@ -1,32 +0,0 @@ -apiVersion: apps/v1 -kind: Deployment -metadata: - name: analytics-exporter-slow -spec: - replicas: 1 - selector: - matchLabels: - app: analytics-exporter-slow - template: - metadata: - labels: - app: analytics-exporter-slow - spec: - containers: - - name: analytics-exporter-slow - image: us-central1-docker.pkg.dev/genuine-flight-317411/devel/memory-eater:1.0 - imagePullPolicy: Always - args: - - 10Mi - - "0" - - 3000Mi - - "5000" - - "2" - resources: - limits: - memory: 2000Mi - requests: - memory: 2000Mi - restartPolicy: Always - nodeSelector: - kubernetes.io/arch: amd64 diff --git a/tests/llm/fixtures/test_ask_holmes/32_oom_kill_graph_memory_consumption/test_case.yaml b/tests/llm/fixtures/test_ask_holmes/32_oom_kill_graph_memory_consumption/test_case.yaml deleted file mode 100644 index 52a76359..00000000 --- a/tests/llm/fixtures/test_ask_holmes/32_oom_kill_graph_memory_consumption/test_case.yaml +++ /dev/null @@ -1,13 +0,0 @@ -user_prompt: "Show me the memory usage over time for the `analytics-exporter-slow` deployment. Does it indicate a memory leak?" -expected_output: - - 'The answer should include something similar to << { tool_name: "execute_prometheus_range_query", random_key: "XXX" } >>.' - - "Yes, the memory profile indicates a memory leak" -# These are slow going. It takes an hour to get one pod killed. This is to make sure the mem leak is slow enough to look like a mem leak for the LLM to see it as such. -# Running with RUN_LIVE will potentially fail the test which is expected. Run the deployment for a couple of hours before running this test live. -before_test: | - kubectl apply -f ./slow_oom_deployment.yaml - sleep 300 -after_test: | - kubectl delete -f ./slow_oom_deployment.yaml -evaluation: - correctness: 1 diff --git a/tests/llm/fixtures/test_ask_holmes/33_cpu_throttling_graph/execute_prometheus_range_query.txt b/tests/llm/fixtures/test_ask_holmes/33_cpu_throttling_graph/execute_prometheus_range_query.txt deleted file mode 100644 index 68ba224d..00000000 --- a/tests/llm/fixtures/test_ask_holmes/33_cpu_throttling_graph/execute_prometheus_range_query.txt +++ /dev/null @@ -1,231 +0,0 @@ -{"toolset_name":"prometheus/metrics","tool_name":"execute_prometheus_range_query","match_params":{"query":"sum(rate(container_cpu_cfs_throttled_periods_total{pod=\"login-app-58995d8584-pbv8p\"}[5m])) by (pod)","description":"*","start":"1739537300","end":"1739540900","step":60}} -{ - "status": "success", - "data": { - "resultType": "matrix", - "result": [ - { - "metric": { - "pod": "login-app-58995d8584-pbv8p" - }, - "values": [ - [ - 1739537840, - "3.4072032186026733" - ], - [ - 1739537900, - "5.406597523823652" - ], - [ - 1739537960, - "7.408753290644883" - ], - [ - 1739538020, - "9.404728838224925" - ], - [ - 1739538080, - "9.995487112162047" - ], - [ - 1739538140, - "8.63625" - ], - [ - 1739538200, - "9.995438074644968" - ], - [ - 1739538260, - "10.001557528424893" - ], - [ - 1739538320, - "10.00137849975695" - ], - [ - 1739538380, - "9.999286569865461" - ], - [ - 1739538440, - "9.99430339128491" - ], - [ - 1739538500, - "9.99486313935569" - ], - [ - 1739538560, - "9.996669024548167" - ], - [ - 1739538620, - "9.994355347478168" - ], - [ - 1739538680, - "9.99736217356898" - ], - [ - 1739538740, - "9.999829470848043" - ], - [ - 1739538800, - "10.001075783374326" - ], - [ - 1739538860, - "9.36550466405067" - ], - [ - 1739538920, - "9.348420425061777" - ], - [ - 1739538980, - "10.001689358078405" - ], - [ - 1739539040, - "9.260918713816103" - ], - [ - 1739539100, - "9.984031632575467" - ], - [ - 1739539160, - "9.98251670543612" - ], - [ - 1739539220, - "9.982983972239353" - ], - [ - 1739539280, - "9.251973814906151" - ], - [ - 1739539340, - "10.002337569665729" - ], - [ - 1739539400, - "9.279420970701942" - ], - [ - 1739539460, - "9.373990551064944" - ], - [ - 1739539520, - "10.000871852250107" - ], - [ - 1739539580, - "9.998340371388002" - ], - [ - 1739539640, - "10.00240247150026" - ], - [ - 1739539700, - "9.998464655240205" - ], - [ - 1739539760, - "10.002335762023522" - ], - [ - 1739539820, - "9.999705891003204" - ], - [ - 1739539880, - "9.993962889957304" - ], - [ - 1739539940, - "9.992202616455367" - ], - [ - 1739540000, - "9.986677436612922" - ], - [ - 1739540060, - "9.990708709470603" - ], - [ - 1739540120, - "9.990659139828615" - ], - [ - 1739540180, - "9.999596494638883" - ], - [ - 1739540240, - "9.998636884433589" - ], - [ - 1739540300, - "9.989851698991583" - ], - [ - 1739540360, - "9.99259125473183" - ], - [ - 1739540420, - "9.387131423731198" - ], - [ - 1739540480, - "9.993471254520895" - ], - [ - 1739540540, - "9.999655843584732" - ], - [ - 1739540600, - "9.997547981542988" - ], - [ - 1739540660, - "9.349657281442829" - ], - [ - 1739540720, - "9.997990544183383" - ], - [ - 1739540780, - "10.00300465721869" - ], - [ - 1739540840, - "10.001487763148106" - ], - [ - 1739540900, - "9.28204958504213" - ] - ] - } - ] - }, - "random_key": "KmFa", - "tool_name": "execute_prometheus_range_query", - "description": "CPU throttling for login-app pod", - "query": "sum(rate(container_cpu_cfs_throttled_periods_total{pod=\"login-app-58995d8584-pbv8p\"}[5m])) by (pod)", - "start": "1739537300", - "end": "1739540900", - "step": 60 -} diff --git a/tests/llm/fixtures/test_ask_holmes/33_cpu_throttling_graph/get_current_time.txt b/tests/llm/fixtures/test_ask_holmes/33_cpu_throttling_graph/get_current_time.txt deleted file mode 100644 index 1b1fcfe5..00000000 --- a/tests/llm/fixtures/test_ask_holmes/33_cpu_throttling_graph/get_current_time.txt +++ /dev/null @@ -1,2 +0,0 @@ -{"toolset_name":"datetime","tool_name":"get_current_time","match_params":{}} -The current UTC date and time are 2025-02-14 13:48:20.037094+00:00. The current UTC timestamp in seconds is 1739540900. diff --git a/tests/llm/fixtures/test_ask_holmes/33_cpu_throttling_graph/helm/Dockerfile b/tests/llm/fixtures/test_ask_holmes/33_cpu_throttling_graph/helm/Dockerfile deleted file mode 100644 index f932d018..00000000 --- a/tests/llm/fixtures/test_ask_holmes/33_cpu_throttling_graph/helm/Dockerfile +++ /dev/null @@ -1,19 +0,0 @@ -FROM python:3.10-slim - -# Set working directory -WORKDIR /app - -# Copy requirements.txt -COPY requirements.txt . - -# Install dependencies -RUN pip install --no-cache-dir -r requirements.txt - -# Copy the FastAPI app -COPY . . - -# Expose the ports -EXPOSE 8000 8001 - -# Run the FastAPI app -CMD ["uvicorn", "app:app", "--host", "0.0.0.0", "--port", "8000"] diff --git a/tests/llm/fixtures/test_ask_holmes/33_cpu_throttling_graph/helm/app.py b/tests/llm/fixtures/test_ask_holmes/33_cpu_throttling_graph/helm/app.py deleted file mode 100644 index a66da5bd..00000000 --- a/tests/llm/fixtures/test_ask_holmes/33_cpu_throttling_graph/helm/app.py +++ /dev/null @@ -1,54 +0,0 @@ -# ruff: noqa: F821 -import logging -import time -from fastapi import FastAPI -from fastapi.responses import JSONResponse -from prometheus_fastapi_instrumentator import Instrumentator -import bcrypt -import json - -app = FastAPI() - -logging.basicConfig(level=logging.INFO) -logger = logging.getLogger(__name__) - -Instrumentator().instrument(app).expose(app) - - -def verify_password(): - logger.info( - "Connecting to promotions database to see if we should try to upsell user" - ) - try: - start_time = time.time() - logger.info("Verify password") - - password = b"test_password" - salt = bcrypt.gensalt(rounds=15) - bcrypt.hashpw(password, salt) - - end_time = time.time() - logger.info( - f"Password verification completed in {end_time - start_time:.2f} seconds." - ) - - return True - except Exception as e: - logger.error(f"Error checking for password: {e}") - return False - - -@app.get("/", response_class=JSONResponse) -def read_root(): - logger.info("Received request for checkout page.") - start_time = time.time() - is_valid = verify_password() - end_time = time.time() - logger.info(f"Page rendered in {end_time - start_time:.2f} seconds.") - return json.dumps({"valid": is_valid}) - - -if __name__ == "__main__": - # Start Prometheus metrics server - start_http_server(8001) - uvicorn.run(app, host="0.0.0.0", port=8000) diff --git a/tests/llm/fixtures/test_ask_holmes/33_cpu_throttling_graph/helm/build.sh b/tests/llm/fixtures/test_ask_holmes/33_cpu_throttling_graph/helm/build.sh deleted file mode 100755 index b1349366..00000000 --- a/tests/llm/fixtures/test_ask_holmes/33_cpu_throttling_graph/helm/build.sh +++ /dev/null @@ -1 +0,0 @@ -docker buildx build --platform linux/amd64 . -t us-central1-docker.pkg.dev/genuine-flight-317411/devel/cpu-throttling-demo diff --git a/tests/llm/fixtures/test_ask_holmes/33_cpu_throttling_graph/helm/manifest.yaml b/tests/llm/fixtures/test_ask_holmes/33_cpu_throttling_graph/helm/manifest.yaml deleted file mode 100644 index 7ad2a2c4..00000000 --- a/tests/llm/fixtures/test_ask_holmes/33_cpu_throttling_graph/helm/manifest.yaml +++ /dev/null @@ -1,100 +0,0 @@ -apiVersion: apps/v1 -kind: Deployment -metadata: - name: login-app -spec: - replicas: 1 - selector: - matchLabels: - app: login-app - template: - metadata: - labels: - app: login-app - spec: - containers: - - name: login-app - image: us-central1-docker.pkg.dev/genuine-flight-317411/devel/cpu-throttling-demo - ports: - - containerPort: 8000 - - containerPort: 8001 - resources: - requests: - cpu: "400m" - limits: - cpu: "800m" - - name: curl-sidecar-1 - image: curlimages/curl - args: - - /bin/sh - - -c - - while true; do curl -s http://localhost:8000; sleep 1; done - - name: curl-sidecar-2 - image: curlimages/curl - args: - - /bin/sh - - -c - - while true; do curl -s http://localhost:8000; sleep 1; done ---- -apiVersion: v1 -kind: Service -metadata: - name: login-app-service - labels: - app: login-app -spec: - selector: - app: login-app - ports: - - protocol: TCP - port: 80 - targetPort: 8000 - name: http - type: ClusterIP ---- -apiVersion: monitoring.coreos.com/v1 -kind: ServiceMonitor -metadata: - name: login-app-service-monitor - labels: - release: robusta -spec: - selector: - matchLabels: - app: login-app - endpoints: - - port: http - path: /metrics - interval: 5s - namespaceSelector: - matchNames: - - default ---- -apiVersion: monitoring.coreos.com/v1 -kind: PrometheusRule -metadata: - name: login-app-alert-rules - labels: - release: robusta -spec: - groups: - - name: loginapp.rules - rules: - - alert: LoginAppCPUThrottling - expr: | - (rate(container_cpu_cfs_throttled_seconds_total{container="login-app"}[5m]) > 0) - for: 1m - labels: - severity: warning - annotations: - summary: "Login App CPU Throttling Detected" - description: "Container {{ $labels.container }} in pod {{ $labels.pod }} has been CPU throttled for the last 5 minutes. This might impact application performance." - - alert: LoginAppHighCPUThrottling - expr: | - (rate(container_cpu_cfs_throttled_seconds_total{container="login-app"}[5m]) > 0.1) - for: 5m - labels: - severity: critical - annotations: - summary: "Login App Severe CPU Throttling" - description: "Container {{ $labels.container }} in pod {{ $labels.pod }} is experiencing severe CPU throttling (>10% of CPU time). Immediate attention required." diff --git a/tests/llm/fixtures/test_ask_holmes/33_cpu_throttling_graph/helm/requirements.txt b/tests/llm/fixtures/test_ask_holmes/33_cpu_throttling_graph/helm/requirements.txt deleted file mode 100644 index 1e7a4b90..00000000 --- a/tests/llm/fixtures/test_ask_holmes/33_cpu_throttling_graph/helm/requirements.txt +++ /dev/null @@ -1,5 +0,0 @@ -bcrypt -fastapi -uvicorn[standard] -pymysql -prometheus-fastapi-instrumentator diff --git a/tests/llm/fixtures/test_ask_holmes/33_cpu_throttling_graph/kubectl_find_resource.txt b/tests/llm/fixtures/test_ask_holmes/33_cpu_throttling_graph/kubectl_find_resource.txt deleted file mode 100644 index 33bd5eb7..00000000 --- a/tests/llm/fixtures/test_ask_holmes/33_cpu_throttling_graph/kubectl_find_resource.txt +++ /dev/null @@ -1,5 +0,0 @@ -{"toolset_name":"kubernetes/core","tool_name":"kubectl_find_resource","match_params":{"kind":"pod","keyword":"login-app"}} -stdout: -default login-app-58995d8584-pbv8p 3/3 Running 0 52m 10.244.0.105 grafana-cloud-control-plane app=login-app,pod-template-hash=58995d8584 - -stderr: diff --git a/tests/llm/fixtures/test_ask_holmes/33_cpu_throttling_graph/kubectl_top_pods.txt b/tests/llm/fixtures/test_ask_holmes/33_cpu_throttling_graph/kubectl_top_pods.txt deleted file mode 100644 index a29a1535..00000000 --- a/tests/llm/fixtures/test_ask_holmes/33_cpu_throttling_graph/kubectl_top_pods.txt +++ /dev/null @@ -1,6 +0,0 @@ -{"toolset_name":"kubernetes/live-metrics","tool_name":"kubectl_top_pods","match_params":{}} -Command `kubectl top pods -A` failed with return code 1 -stdout: - -stderr: -error: Metrics API not available diff --git a/tests/llm/fixtures/test_ask_holmes/33_cpu_throttling_graph/list_available_metrics.txt b/tests/llm/fixtures/test_ask_holmes/33_cpu_throttling_graph/list_available_metrics.txt deleted file mode 100644 index 3deb9867..00000000 --- a/tests/llm/fixtures/test_ask_holmes/33_cpu_throttling_graph/list_available_metrics.txt +++ /dev/null @@ -1,4 +0,0 @@ -{"toolset_name":"prometheus/metrics","tool_name":"list_available_metrics","match_params":{"name_filter":"container_cpu_cfs_throttled_periods_total"}} -Metric | Description | Type | Labels ----------------------------------------------------------------------------------------------------- -container_cpu_cfs_throttled_periods_total | Number of throttled period intervals. | counter | container, endpoint, id, image, instance, job, metrics_path, name, namespace, node, pod, service diff --git a/tests/llm/fixtures/test_ask_holmes/33_cpu_throttling_graph/test_case.yaml b/tests/llm/fixtures/test_ask_holmes/33_cpu_throttling_graph/test_case.yaml deleted file mode 100644 index 78faf42e..00000000 --- a/tests/llm/fixtures/test_ask_holmes/33_cpu_throttling_graph/test_case.yaml +++ /dev/null @@ -1,7 +0,0 @@ -user_prompt: "Is the login-app being CPU throttled? Show me." -expected_output: - - 'The answer should include something similar to << { tool_name: "execute_prometheus_range_query", random_key: "XXX" } >>.' -before_test: kubectl apply -f ./helm/manifest.yaml -after_test: kubectl delete -f ./helm/manifest.yaml -evaluation: - correctness: 1 diff --git a/tests/test_prometheus_integration.py b/tests/test_prometheus_integration.py index c52f671f..6739b2d8 100644 --- a/tests/test_prometheus_integration.py +++ b/tests/test_prometheus_integration.py @@ -1,3 +1,4 @@ +import datetime import json import os @@ -25,19 +26,101 @@ def tool_executor(): return tool_executor -def test_list_available_metrics(tool_executor: ToolExecutor): +def test_list_available_metrics_exact_match(tool_executor: ToolExecutor): tool = tool_executor.get_tool_by_name("list_available_metrics") assert tool actual_output = tool.invoke({"name_filter": "kubelet_running_pods"}) print(actual_output) assert "kubelet_running_pods" in actual_output + assert ( + "Number of pods that have a running pod sandbox" in actual_output + ) # description + assert "gauge" in actual_output # type + assert "node" in actual_output # label + assert "namespace" in actual_output # label -def test_execute_prometheus_query(tool_executor: ToolExecutor): +def test_list_available_metrics_partial_match(tool_executor: ToolExecutor): + tool = tool_executor.get_tool_by_name("list_available_metrics") + assert tool + actual_output = tool.invoke({"name_filter": "http"}) + print(actual_output) + assert ( + "http_requests_total | Total number of requests by method, status and handler. | counter" + in actual_output + ) + assert ( + "kubelet_http_requests_total | [ALPHA] Number of the http requests received since the server started | counter" + in actual_output + ) + + # Ensure there is some common labels present in the result + assert "endpoint" in actual_output + assert "container" in actual_output + assert "namespace" in actual_output + assert "job" in actual_output + assert "node" in actual_output + assert "pod" in actual_output + assert "service" in actual_output + + +def test_execute_prometheus_instant_query(tool_executor: ToolExecutor): + tool = tool_executor.get_tool_by_name("execute_prometheus_instant_query") + assert tool + actual_output = tool.invoke({"query": "up"}) + print(actual_output) + assert actual_output + parsed_output = json.loads(actual_output) + assert parsed_output.get("status") == "success" + + +def test_execute_prometheus_instant_query_no_result(tool_executor: ToolExecutor): + tool = tool_executor.get_tool_by_name("execute_prometheus_instant_query") + assert tool + actual_output = tool.invoke({"query": "this_metric_does_not_exist"}) + print(actual_output) + assert actual_output + parsed_output = json.loads(actual_output) + assert parsed_output.get("status") == "Failed" + assert ( + parsed_output.get("error_message") + == "The prometheus query returned no result. Is the query correct?" + ) + + +def test_execute_prometheus_range_query_no_result(tool_executor: ToolExecutor): + tool = tool_executor.get_tool_by_name("execute_prometheus_range_query") + assert tool + twenty_minutes = 20 * 60 + now = datetime.datetime.now(datetime.timezone.utc).timestamp() + actual_output = tool.invoke( + { + "query": "this_metric_does_not_exist", + "start": now - twenty_minutes, + "end": now, + "step": 1, + } + ) + print(actual_output) + assert actual_output + parsed_output = json.loads(actual_output) + assert parsed_output.get("status") == "Failed" + assert ( + parsed_output.get("error_message") + == "The prometheus query returned no result. Is the query correct?" + ) + + +def test_execute_prometheus_range_query(tool_executor: ToolExecutor): tool = tool_executor.get_tool_by_name("execute_prometheus_instant_query") assert tool - actual_output = tool.invoke({"query": "up", "type": "query"}) + twenty_minutes = 20 * 60 + now = datetime.datetime.now(datetime.timezone.utc).timestamp() + actual_output = tool.invoke( + {"query": "up", "start": now - twenty_minutes, "end": now, "step": 1} + ) print(actual_output) assert actual_output parsed_output = json.loads(actual_output) assert parsed_output.get("status") == "success" + assert not parsed_output.get("error_message") From c92a9c00fc52bc9465300b7c6c88743594a281d2 Mon Sep 17 00:00:00 2001 From: Nicolas Herment Date: Fri, 21 Feb 2025 09:48:54 +0100 Subject: [PATCH 06/14] test: add test and comment to openai formatting for toolset typeS --- holmes/core/openai_formatting.py | 2 ++ tests/test_openai_formatting.py | 30 ++++++++++++++++++++++++++++++ 2 files changed, 32 insertions(+) create mode 100644 tests/test_openai_formatting.py diff --git a/holmes/core/openai_formatting.py b/holmes/core/openai_formatting.py index e9f68a04..c218632d 100644 --- a/holmes/core/openai_formatting.py +++ b/holmes/core/openai_formatting.py @@ -1,5 +1,7 @@ import re +# parses both simple types: "int", "array", "string" +# but also arrays of those simpler types: "array[int]", "array[string]", etc. pattern = r"^(array\[(?P\w+)\])|(?P\w+)$" diff --git a/tests/test_openai_formatting.py b/tests/test_openai_formatting.py new file mode 100644 index 00000000..33eb60ff --- /dev/null +++ b/tests/test_openai_formatting.py @@ -0,0 +1,30 @@ + + +import pytest +from holmes.core.openai_formatting import type_to_open_ai_schema + + +@pytest.mark.parametrize( + "toolset_type, open_ai_type", + [ + ( + "int", + {"type": "int"}, + ), + ( + "string", + {"type": "string"}, + ), + ( + "array[int]", + {"type": "array", "items": {"type": "int"}}, + ), + ( + "array[string]", + {"type": "array", "items": {"type": "string"}}, + ), + ], +) +def test_type_to_open_ai_schema(toolset_type, open_ai_type): + result = type_to_open_ai_schema(toolset_type) + assert result == open_ai_type From bbceaf40842aa3b84b9796b3ec478fd3f1079e3d Mon Sep 17 00:00:00 2001 From: Nicolas Herment Date: Fri, 21 Feb 2025 10:32:31 +0100 Subject: [PATCH 07/14] feat: tweak the prompt for prometheus queries --- .../prompts/generic_ask_conversation.jinja2 | 11 +- holmes/plugins/toolsets/prometheus.py | 2 +- .../test_case.yaml | 1 - .../execute_prometheus_range_query.txt | 11 + .../execute_prometheus_range_query_2.txt | 11 + .../execute_prometheus_range_query_median.txt | 12 + .../get_current_time.txt | 0 .../helm/Dockerfile | 0 .../helm/app.py | 0 .../helm/build.sh | 0 .../helm/manifest.yaml | 0 .../helm/requirements.txt | 0 .../kubectl_find_resource.txt | 5 + .../list_available_metrics.txt | 0 .../test_case.yaml | 3 +- .../execute_prometheus_range_query.txt | 1100 ------- .../execute_prometheus_range_query_2.txt | 2598 ----------------- tests/test_openai_formatting.py | 2 - 18 files changed, 47 insertions(+), 3709 deletions(-) create mode 100644 tests/llm/fixtures/test_ask_holmes/32_http_latency_graph/execute_prometheus_range_query.txt create mode 100644 tests/llm/fixtures/test_ask_holmes/32_http_latency_graph/execute_prometheus_range_query_2.txt create mode 100644 tests/llm/fixtures/test_ask_holmes/32_http_latency_graph/execute_prometheus_range_query_median.txt rename tests/llm/fixtures/test_ask_holmes/{34_http_latency_graph => 32_http_latency_graph}/get_current_time.txt (100%) rename tests/llm/fixtures/test_ask_holmes/{34_http_latency_graph => 32_http_latency_graph}/helm/Dockerfile (100%) rename tests/llm/fixtures/test_ask_holmes/{34_http_latency_graph => 32_http_latency_graph}/helm/app.py (100%) rename tests/llm/fixtures/test_ask_holmes/{34_http_latency_graph => 32_http_latency_graph}/helm/build.sh (100%) rename tests/llm/fixtures/test_ask_holmes/{34_http_latency_graph => 32_http_latency_graph}/helm/manifest.yaml (100%) rename tests/llm/fixtures/test_ask_holmes/{34_http_latency_graph => 32_http_latency_graph}/helm/requirements.txt (100%) create mode 100644 tests/llm/fixtures/test_ask_holmes/32_http_latency_graph/kubectl_find_resource.txt rename tests/llm/fixtures/test_ask_holmes/{34_http_latency_graph => 32_http_latency_graph}/list_available_metrics.txt (100%) rename tests/llm/fixtures/test_ask_holmes/{34_http_latency_graph => 32_http_latency_graph}/test_case.yaml (63%) delete mode 100644 tests/llm/fixtures/test_ask_holmes/34_http_latency_graph/execute_prometheus_range_query.txt delete mode 100644 tests/llm/fixtures/test_ask_holmes/34_http_latency_graph/execute_prometheus_range_query_2.txt diff --git a/holmes/plugins/prompts/generic_ask_conversation.jinja2 b/holmes/plugins/prompts/generic_ask_conversation.jinja2 index d404ff23..b411dd2a 100644 --- a/holmes/plugins/prompts/generic_ask_conversation.jinja2 +++ b/holmes/plugins/prompts/generic_ask_conversation.jinja2 @@ -20,11 +20,12 @@ Prometheus/PromQL queries ** Avoid using `_bucket` unless you know the bucket's boundaries are configured correctly ** Prefer individual averages like `rate(_sum) / rate(_count)` ** Avoid global averages like `sum(rate(_sum)) / sum(rate(_count))` because it hides data and is not generally informative -* Post processing will parse your response, re-run the query from the tool output and create a chart -* Only generate and execute a prometheus query after checking what metrics are available with the `list_available_metrics` tool. Filter as needed. -* Check that any node, service, pod, container, app, namespace, etc. mentioned in the query exist in the kubernetes cluster before making a query. Use any appropriate kubectl tool(s) for this. -* The toolcall will return no data to you. That is expected. Only the user will see the charts -* You MUST get the current time before executing a prometheus range query +* Post processing will parse your response, re-run the query from the tool output and create a chart visible to the user +* Only generate and execute a prometheus query after checking what metrics are available with the `list_available_metrics` tool +* Check that any node, service, pod, container, app, namespace, etc. mentioned in the query exist in the kubernetes cluster before making a query. Use any appropriate kubectl tool(s) for this +* The toolcall will return no data to you. That is expected. You MUST however ensure that the query is successful. +* You can get the current time before executing a prometheus range query +* ALWAYS embed the execution results into your answer Style guide: * Reply with terse output. diff --git a/holmes/plugins/toolsets/prometheus.py b/holmes/plugins/toolsets/prometheus.py index e8a3d6c7..228e7d03 100644 --- a/holmes/plugins/toolsets/prometheus.py +++ b/holmes/plugins/toolsets/prometheus.py @@ -71,7 +71,7 @@ def fetch_metadata(url: str) -> dict: def result_has_data(result: dict) -> bool: data = result.get("data", {}) - if data.get("resultType", None) == "vector" and len(data.get("result", [])) > 0: + if len(data.get("result", [])) > 0: return True return False diff --git a/tests/llm/fixtures/test_ask_holmes/30_basic_promql_graph_pod_memory/test_case.yaml b/tests/llm/fixtures/test_ask_holmes/30_basic_promql_graph_pod_memory/test_case.yaml index 45d2c003..e8e5fae8 100644 --- a/tests/llm/fixtures/test_ask_holmes/30_basic_promql_graph_pod_memory/test_case.yaml +++ b/tests/llm/fixtures/test_ask_holmes/30_basic_promql_graph_pod_memory/test_case.yaml @@ -3,4 +3,3 @@ expected_output: - 'The answer should include something similar to << { tool_name: "execute_prometheus_range_query", random_key: "vwJA" } >>. Both tool_name and random_key MUST absolutely be present and the same as mentioned here.' evaluation: correctness: 1 -generate_mocks: True diff --git a/tests/llm/fixtures/test_ask_holmes/32_http_latency_graph/execute_prometheus_range_query.txt b/tests/llm/fixtures/test_ask_holmes/32_http_latency_graph/execute_prometheus_range_query.txt new file mode 100644 index 00000000..cca45195 --- /dev/null +++ b/tests/llm/fixtures/test_ask_holmes/32_http_latency_graph/execute_prometheus_range_query.txt @@ -0,0 +1,11 @@ +{"toolset_name":"prometheus/metrics","tool_name":"execute_prometheus_range_query","match_params":{"query":"http_request_duration_seconds_sum / http_request_duration_seconds_count","description":"*","start":"*","end":"*","step":"*"}} +{ + "status": "success", + "random_key": "0we9", + "tool_name": "execute_prometheus_range_query", + "description": "Average HTTP request latency for customer-orders-service", + "query": "http_request_duration_seconds_sum / http_request_duration_seconds_count", + "start": "1739705559", + "end": "1739791959", + "step": 300 +} diff --git a/tests/llm/fixtures/test_ask_holmes/32_http_latency_graph/execute_prometheus_range_query_2.txt b/tests/llm/fixtures/test_ask_holmes/32_http_latency_graph/execute_prometheus_range_query_2.txt new file mode 100644 index 00000000..a0850bfc --- /dev/null +++ b/tests/llm/fixtures/test_ask_holmes/32_http_latency_graph/execute_prometheus_range_query_2.txt @@ -0,0 +1,11 @@ +{"toolset_name":"prometheus/metrics","tool_name":"execute_prometheus_range_query","match_params":{"query":"rate(http_request_duration_seconds_sum{service=\"customer-orders-service\"}[5m]) / rate(http_request_duration_seconds_count{service=\"customer-orders-service\"}[5m])","description":"*","start":"*","end":"*","step":"*"}} +{ + "status": "success", + "random_key": "9kLK", + "tool_name": "execute_prometheus_range_query", + "description": "Average HTTP request latency for customer-orders-service", + "query": "rate(http_request_duration_seconds_sum{service=\"customer-orders-service\"}[5m]) / rate(http_request_duration_seconds_count{service=\"customer-orders-service\"}[5m])", + "start": "1739705559", + "end": "1739791959", + "step": 60 +} diff --git a/tests/llm/fixtures/test_ask_holmes/32_http_latency_graph/execute_prometheus_range_query_median.txt b/tests/llm/fixtures/test_ask_holmes/32_http_latency_graph/execute_prometheus_range_query_median.txt new file mode 100644 index 00000000..c0cdee82 --- /dev/null +++ b/tests/llm/fixtures/test_ask_holmes/32_http_latency_graph/execute_prometheus_range_query_median.txt @@ -0,0 +1,12 @@ +{"toolset_name":"prometheus/metrics","tool_name":"execute_prometheus_range_query","match_params":{"query":"histogram_quantile(0.5, sum(rate(http_request_duration_seconds_bucket{service=\"customer-orders-service\"}[5m])) by (le))","description":"*","start":"*","end":"*","step":"*"}} +{ + "status": "success", + "error_message": null, + "random_key": "FaGs", + "tool_name": "execute_prometheus_range_query", + "description": "Median HTTP request latency for customer-orders-service", + "query": "histogram_quantile(0.5, sum(rate(http_request_duration_seconds_bucket{service=\"customer-orders-service\"}[5m])) by (le))", + "start": "1739705559", + "end": "1739791959", + "step": 300 +} diff --git a/tests/llm/fixtures/test_ask_holmes/34_http_latency_graph/get_current_time.txt b/tests/llm/fixtures/test_ask_holmes/32_http_latency_graph/get_current_time.txt similarity index 100% rename from tests/llm/fixtures/test_ask_holmes/34_http_latency_graph/get_current_time.txt rename to tests/llm/fixtures/test_ask_holmes/32_http_latency_graph/get_current_time.txt diff --git a/tests/llm/fixtures/test_ask_holmes/34_http_latency_graph/helm/Dockerfile b/tests/llm/fixtures/test_ask_holmes/32_http_latency_graph/helm/Dockerfile similarity index 100% rename from tests/llm/fixtures/test_ask_holmes/34_http_latency_graph/helm/Dockerfile rename to tests/llm/fixtures/test_ask_holmes/32_http_latency_graph/helm/Dockerfile diff --git a/tests/llm/fixtures/test_ask_holmes/34_http_latency_graph/helm/app.py b/tests/llm/fixtures/test_ask_holmes/32_http_latency_graph/helm/app.py similarity index 100% rename from tests/llm/fixtures/test_ask_holmes/34_http_latency_graph/helm/app.py rename to tests/llm/fixtures/test_ask_holmes/32_http_latency_graph/helm/app.py diff --git a/tests/llm/fixtures/test_ask_holmes/34_http_latency_graph/helm/build.sh b/tests/llm/fixtures/test_ask_holmes/32_http_latency_graph/helm/build.sh similarity index 100% rename from tests/llm/fixtures/test_ask_holmes/34_http_latency_graph/helm/build.sh rename to tests/llm/fixtures/test_ask_holmes/32_http_latency_graph/helm/build.sh diff --git a/tests/llm/fixtures/test_ask_holmes/34_http_latency_graph/helm/manifest.yaml b/tests/llm/fixtures/test_ask_holmes/32_http_latency_graph/helm/manifest.yaml similarity index 100% rename from tests/llm/fixtures/test_ask_holmes/34_http_latency_graph/helm/manifest.yaml rename to tests/llm/fixtures/test_ask_holmes/32_http_latency_graph/helm/manifest.yaml diff --git a/tests/llm/fixtures/test_ask_holmes/34_http_latency_graph/helm/requirements.txt b/tests/llm/fixtures/test_ask_holmes/32_http_latency_graph/helm/requirements.txt similarity index 100% rename from tests/llm/fixtures/test_ask_holmes/34_http_latency_graph/helm/requirements.txt rename to tests/llm/fixtures/test_ask_holmes/32_http_latency_graph/helm/requirements.txt diff --git a/tests/llm/fixtures/test_ask_holmes/32_http_latency_graph/kubectl_find_resource.txt b/tests/llm/fixtures/test_ask_holmes/32_http_latency_graph/kubectl_find_resource.txt new file mode 100644 index 00000000..804dbcf2 --- /dev/null +++ b/tests/llm/fixtures/test_ask_holmes/32_http_latency_graph/kubectl_find_resource.txt @@ -0,0 +1,5 @@ +{"toolset_name":"kubernetes/core","tool_name":"kubectl_find_resource","match_params":{"kind":"service","keyword":"customer-orders-service"}} +stdout: +default customer-orders-service ClusterIP 10.96.132.215 80/TCP 25d app=customer-orders app=customer-orders + +stderr: diff --git a/tests/llm/fixtures/test_ask_holmes/34_http_latency_graph/list_available_metrics.txt b/tests/llm/fixtures/test_ask_holmes/32_http_latency_graph/list_available_metrics.txt similarity index 100% rename from tests/llm/fixtures/test_ask_holmes/34_http_latency_graph/list_available_metrics.txt rename to tests/llm/fixtures/test_ask_holmes/32_http_latency_graph/list_available_metrics.txt diff --git a/tests/llm/fixtures/test_ask_holmes/34_http_latency_graph/test_case.yaml b/tests/llm/fixtures/test_ask_holmes/32_http_latency_graph/test_case.yaml similarity index 63% rename from tests/llm/fixtures/test_ask_holmes/34_http_latency_graph/test_case.yaml rename to tests/llm/fixtures/test_ask_holmes/32_http_latency_graph/test_case.yaml index a3739334..8e335408 100644 --- a/tests/llm/fixtures/test_ask_holmes/34_http_latency_graph/test_case.yaml +++ b/tests/llm/fixtures/test_ask_holmes/32_http_latency_graph/test_case.yaml @@ -1,7 +1,6 @@ -user_prompt: "How is the http request latency for the service customer-orders-service?" +user_prompt: "Show me the http request latency over time for the service customer-orders-service?" expected_output: - 'The answer should include something similar to << { tool_name: "execute_prometheus_range_query", random_key: "" } >>.' - - "The root endpoint `/` latency ranges (very roughly) from 5+ to 8+ seconds." before_test: kubectl apply -f ./helm/manifest.yaml after_test: kubectl delete -f ./helm/manifest.yaml evaluation: diff --git a/tests/llm/fixtures/test_ask_holmes/34_http_latency_graph/execute_prometheus_range_query.txt b/tests/llm/fixtures/test_ask_holmes/34_http_latency_graph/execute_prometheus_range_query.txt deleted file mode 100644 index 45b22f66..00000000 --- a/tests/llm/fixtures/test_ask_holmes/34_http_latency_graph/execute_prometheus_range_query.txt +++ /dev/null @@ -1,1100 +0,0 @@ -{"toolset_name":"prometheus/metrics","tool_name":"execute_prometheus_range_query","match_params":{"query":"http_request_duration_seconds_sum / http_request_duration_seconds_count","description":"*","start":"*","end":"*","step":"*"}} -{ - "status": "success", - "data": { - "resultType": "matrix", - "result": [ - { - "metric": { - "container": "fastapi-app", - "endpoint": "http", - "handler": "/", - "instance": "10.244.0.19:8000", - "job": "customer-orders-service", - "method": "GET", - "namespace": "default", - "pod": "customer-orders-6f5cbdf85-c5fsf", - "service": "customer-orders-service" - }, - "values": [ - [ - 1739773059, - "7.2587948227500085" - ], - [ - 1739773359, - "8.23126970688889" - ], - [ - 1739773659, - "7.9317453926923145" - ], - [ - 1739773959, - "8.008496379882345" - ], - [ - 1739774259, - "8.053626901636362" - ], - [ - 1739774559, - "7.815565156692309" - ], - [ - 1739774859, - "7.555963071935487" - ], - [ - 1739775159, - "7.5790614398285765" - ], - [ - 1739775459, - "7.732811918500014" - ], - [ - 1739775759, - "7.689560054227289" - ], - [ - 1739776059, - "7.660539889428578" - ], - [ - 1739776359, - "7.630091538603779" - ], - [ - 1739776659, - "7.656471271473686" - ], - [ - 1739776959, - "7.636309512241953" - ], - [ - 1739777259, - "7.583012037212128" - ], - [ - 1739777559, - "7.500178550943661" - ], - [ - 1739777859, - "7.500630043146668" - ], - [ - 1739778159, - "7.557407538824999" - ], - [ - 1739778459, - "7.638421752964276" - ], - [ - 1739778759, - "7.625372610382013" - ], - [ - 1739779059, - "7.566496483698924" - ], - [ - 1739779359, - "7.548145681469384" - ], - [ - 1739779659, - "7.5465564971274475" - ], - [ - 1739779959, - "7.601751123707535" - ], - [ - 1739780259, - "7.565906024747731" - ], - [ - 1739780559, - "7.607407520895647" - ], - [ - 1739780859, - "7.574012064941668" - ], - [ - 1739781159, - "7.539496465258071" - ], - [ - 1739781459, - "7.573159430651165" - ], - [ - 1739781759, - "7.571160032458671" - ], - [ - 1739782059, - "7.5619592621898155" - ], - [ - 1739782359, - "7.570613084725389" - ], - [ - 1739782659, - "7.568860758157557" - ], - [ - 1739782959, - "7.543620602437094" - ], - [ - 1739783259, - "7.5233010567161465" - ], - [ - 1739783559, - "7.494653588650015" - ], - [ - 1739783859, - "7.464420700451223" - ], - [ - 1739784159, - "7.427150739071009" - ], - [ - 1739784459, - "7.463726934161848" - ], - [ - 1739784759, - "7.49586881620787" - ], - [ - 1739785059, - "7.474120202379134" - ], - [ - 1739785359, - "7.450943975796812" - ], - [ - 1739785659, - "7.415401043261803" - ], - [ - 1739785959, - "7.445837658071445" - ], - [ - 1739786259, - "7.467061023510009" - ], - [ - 1739786559, - "7.472762682303937" - ], - [ - 1739786859, - "7.442438846483263" - ], - [ - 1739787159, - "7.453061777225362" - ], - [ - 1739787459, - "7.410695965944959" - ], - [ - 1739787759, - "7.430464756954955" - ], - [ - 1739788059, - "7.4475660274229005" - ], - [ - 1739788359, - "7.448570901545438" - ], - [ - 1739788659, - "7.430730616487269" - ], - [ - 1739788959, - "7.4528477457374835" - ], - [ - 1739789259, - "7.441440416938509" - ], - [ - 1739789559, - "7.448793543385554" - ], - [ - 1739789859, - "7.449726605861679" - ], - [ - 1739790159, - "7.460546300430233" - ], - [ - 1739790459, - "7.4650803777862516" - ], - [ - 1739790759, - "7.45649466648687" - ], - [ - 1739791059, - "7.457236310977847" - ], - [ - 1739791359, - "7.461593859447282" - ], - [ - 1739791659, - "7.4641863409714295" - ], - [ - 1739791959, - "7.457733998193641" - ] - ] - }, - { - "metric": { - "container": "fastapi-app", - "endpoint": "http", - "handler": "/metrics", - "instance": "10.244.0.19:8000", - "job": "customer-orders-service", - "method": "GET", - "namespace": "default", - "pod": "customer-orders-6f5cbdf85-c5fsf", - "service": "customer-orders-service" - }, - "values": [ - [ - 1739773059, - "0.0011778313823494185" - ], - [ - 1739773359, - "0.0012135840270238879" - ], - [ - 1739773659, - "0.0012112306929768024" - ], - [ - 1739773959, - "0.0012008533701252202" - ], - [ - 1739774259, - "0.001195991505147101" - ], - [ - 1739774559, - "0.0011950833803378216" - ], - [ - 1739774859, - "0.0011962475656896961" - ], - [ - 1739775159, - "0.0012018490413947748" - ], - [ - 1739775459, - "0.0012095391949127807" - ], - [ - 1739775759, - "0.0012050768477123908" - ], - [ - 1739776059, - "0.0012054137350186792" - ], - [ - 1739776359, - "0.001206090597041833" - ], - [ - 1739776659, - "0.001211775904666278" - ], - [ - 1739776959, - "0.0012099143664220488" - ], - [ - 1739777259, - "0.001209277887199517" - ], - [ - 1739777559, - "0.001206946253935319" - ], - [ - 1739777859, - "0.0012048928145299653" - ], - [ - 1739778159, - "0.0012041727450907188" - ], - [ - 1739778459, - "0.0012038800596742055" - ], - [ - 1739778759, - "0.0012027131334943577" - ], - [ - 1739779059, - "0.0012019704820075505" - ], - [ - 1739779359, - "0.001201262989698879" - ], - [ - 1739779659, - "0.001199501343542595" - ], - [ - 1739779959, - "0.0011993036855336462" - ], - [ - 1739780259, - "0.00119945177766277" - ], - [ - 1739780559, - "0.0012001496972885853" - ], - [ - 1739780859, - "0.001200470073553606" - ], - [ - 1739781159, - "0.0012024492333818396" - ], - [ - 1739781459, - "0.0012040872252957117" - ], - [ - 1739781759, - "0.0012049704045202848" - ], - [ - 1739782059, - "0.0012057259781239336" - ], - [ - 1739782359, - "0.0012063729497669208" - ], - [ - 1739782659, - "0.0012076399840166019" - ], - [ - 1739782959, - "0.0012078742991129643" - ], - [ - 1739783259, - "0.0012052392969895296" - ], - [ - 1739783559, - "0.0012046767015325762" - ], - [ - 1739783859, - "0.0012071719776108071" - ], - [ - 1739784159, - "0.0012076973051504411" - ], - [ - 1739784459, - "0.0012083144446567524" - ], - [ - 1739784759, - "0.0012094431712657034" - ], - [ - 1739785059, - "0.001211459146878997" - ], - [ - 1739785359, - "0.0012136649074057143" - ], - [ - 1739785659, - "0.0012133093249669534" - ], - [ - 1739785959, - "0.0012123688916771459" - ], - [ - 1739786259, - "0.0012125048874028716" - ], - [ - 1739786559, - "0.0012116863064364715" - ], - [ - 1739786859, - "0.0012119315314876766" - ], - [ - 1739787159, - "0.0012129045611326296" - ], - [ - 1739787459, - "0.0012121163132036183" - ], - [ - 1739787759, - "0.0012125796785343247" - ], - [ - 1739788059, - "0.001213379090460214" - ], - [ - 1739788359, - "0.0012147431721285036" - ], - [ - 1739788659, - "0.0012140533344325655" - ], - [ - 1739788959, - "0.0012117559192166662" - ], - [ - 1739789259, - "0.0012106095432976303" - ], - [ - 1739789559, - "0.0012095006302576964" - ], - [ - 1739789859, - "0.001209972794638966" - ], - [ - 1739790159, - "0.0012095507696670438" - ], - [ - 1739790459, - "0.0012096573347496315" - ], - [ - 1739790759, - "0.0012104484912255531" - ], - [ - 1739791059, - "0.0012108398126455613" - ], - [ - 1739791359, - "0.0012106329894833428" - ], - [ - 1739791659, - "0.0012124586849537311" - ], - [ - 1739791959, - "0.001211919155042263" - ] - ] - }, - { - "metric": { - "container": "login-app", - "endpoint": "http", - "handler": "/", - "instance": "10.244.0.30:8000", - "job": "login-app-service", - "method": "GET", - "namespace": "default", - "pod": "login-app-58995d8584-pbv8p", - "service": "login-app-service" - }, - "values": [ - [ - 1739773059, - "2.83138598130303" - ], - [ - 1739773359, - "2.830102092584772" - ], - [ - 1739773659, - "2.8284908548359566" - ], - [ - 1739773959, - "2.8278310807242577" - ], - [ - 1739774259, - "2.828446740489451" - ], - [ - 1739774559, - "2.828184324408743" - ], - [ - 1739774859, - "2.8279011449346414" - ], - [ - 1739775159, - "2.827529304447884" - ], - [ - 1739775459, - "2.827177012685198" - ], - [ - 1739775759, - "2.8271436622096044" - ], - [ - 1739776059, - "2.827210650151357" - ], - [ - 1739776359, - "2.8275159268187777" - ], - [ - 1739776659, - "2.827450344868665" - ], - [ - 1739776959, - "2.8274680421075318" - ], - [ - 1739777259, - "2.8275885253004827" - ], - [ - 1739777559, - "2.827507495578638" - ], - [ - 1739777859, - "2.827654852458657" - ], - [ - 1739778159, - "2.827741677818121" - ], - [ - 1739778459, - "2.8277080907233025" - ], - [ - 1739778759, - "2.8279315805660103" - ], - [ - 1739779059, - "2.828051686273458" - ], - [ - 1739779359, - "2.8291546968806" - ], - [ - 1739779659, - "2.8291437681633993" - ], - [ - 1739779959, - "2.829080858675334" - ], - [ - 1739780259, - "2.8290324227316668" - ], - [ - 1739780559, - "2.8289147258891867" - ], - [ - 1739780859, - "2.8288079988888084" - ], - [ - 1739781159, - "2.8287009978934834" - ], - [ - 1739781459, - "2.8288963259324067" - ], - [ - 1739781759, - "2.8292331362227507" - ], - [ - 1739782059, - "2.8294056472924414" - ], - [ - 1739782359, - "2.8293562156019734" - ], - [ - 1739782659, - "2.829205154885758" - ], - [ - 1739782959, - "2.8290954958162438" - ], - [ - 1739783259, - "2.8290240288020576" - ], - [ - 1739783559, - "2.828992801825756" - ], - [ - 1739783859, - "2.829000384073736" - ], - [ - 1739784159, - "2.8289273408123683" - ], - [ - 1739784459, - "2.8288087803547852" - ], - [ - 1739784759, - "2.828647169326283" - ], - [ - 1739785059, - "2.8285850495885825" - ], - [ - 1739785359, - "2.828510335493367" - ], - [ - 1739785659, - "2.8285983651139857" - ], - [ - 1739785959, - "2.8285662159537983" - ], - [ - 1739786259, - "2.8288122549132066" - ], - [ - 1739786559, - "2.8290214643176306" - ], - [ - 1739786859, - "2.8291358078182376" - ], - [ - 1739787159, - "2.8290825703515603" - ], - [ - 1739787459, - "2.8290425153097876" - ], - [ - 1739787759, - "2.8290476434794325" - ], - [ - 1739788059, - "2.8291700131104025" - ], - [ - 1739788359, - "2.8292411950691903" - ], - [ - 1739788659, - "2.8292403303221807" - ], - [ - 1739788959, - "2.8291497713546416" - ], - [ - 1739789259, - "2.829131832617153" - ], - [ - 1739789559, - "2.829138672372275" - ], - [ - 1739789859, - "2.8291698940982037" - ], - [ - 1739790159, - "2.8291522689544943" - ], - [ - 1739790459, - "2.8290932602137135" - ], - [ - 1739790759, - "2.829067023508567" - ], - [ - 1739791059, - "2.829232057446435" - ], - [ - 1739791359, - "2.8293521537521373" - ], - [ - 1739791659, - "2.8293414115216846" - ], - [ - 1739791959, - "2.8293539812287145" - ] - ] - }, - { - "metric": { - "container": "login-app", - "endpoint": "http", - "handler": "/metrics", - "instance": "10.244.0.30:8000", - "job": "login-app-service", - "method": "GET", - "namespace": "default", - "pod": "login-app-58995d8584-pbv8p", - "service": "login-app-service" - }, - "values": [ - [ - 1739773059, - "0.00846668857143158" - ], - [ - 1739773359, - "0.008678134018348665" - ], - [ - 1739773659, - "0.008588971893490137" - ], - [ - 1739773959, - "0.008546644860262416" - ], - [ - 1739774259, - "0.00877100479930647" - ], - [ - 1739774559, - "0.008492624879654503" - ], - [ - 1739774859, - "0.008520295870417598" - ], - [ - 1739775159, - "0.0085960204584219" - ], - [ - 1739775459, - "0.008748462376179489" - ], - [ - 1739775759, - "0.008707839146004707" - ], - [ - 1739776059, - "0.008802245097065673" - ], - [ - 1739776359, - "0.00884219228630757" - ], - [ - 1739776659, - "0.00880753871780504" - ], - [ - 1739776959, - "0.008836821677914912" - ], - [ - 1739777259, - "0.008843312683905301" - ], - [ - 1739777559, - "0.008805021601678644" - ], - [ - 1739777859, - "0.008850501024768212" - ], - [ - 1739778159, - "0.008829969988763765" - ], - [ - 1739778459, - "0.00887498152169415" - ], - [ - 1739778759, - "0.008862755513872543" - ], - [ - 1739779059, - "0.0088982690792612" - ], - [ - 1739779359, - "0.008881663406415463" - ], - [ - 1739779659, - "0.008891747513508508" - ], - [ - 1739779959, - "0.008922389587117533" - ], - [ - 1739780259, - "0.008962377049021274" - ], - [ - 1739780559, - "0.008975942372491854" - ], - [ - 1739780859, - "0.008994294058416386" - ], - [ - 1739781159, - "0.008982070990407206" - ], - [ - 1739781459, - "0.0089648725812625" - ], - [ - 1739781759, - "0.008960532272217001" - ], - [ - 1739782059, - "0.008998948832883553" - ], - [ - 1739782359, - "0.009003110165003011" - ], - [ - 1739782659, - "0.009032165633308479" - ], - [ - 1739782959, - "0.009047435979789789" - ], - [ - 1739783259, - "0.009012811550979962" - ], - [ - 1739783559, - "0.00903455379664957" - ], - [ - 1739783859, - "0.008989507687646312" - ], - [ - 1739784159, - "0.009035354254298638" - ], - [ - 1739784459, - "0.009003218899959945" - ], - [ - 1739784759, - "0.009004752629975651" - ], - [ - 1739785059, - "0.009022955156801054" - ], - [ - 1739785359, - "0.00902572053288378" - ], - [ - 1739785659, - "0.00904632557026731" - ], - [ - 1739785959, - "0.009043385511223401" - ], - [ - 1739786259, - "0.009062357688732045" - ], - [ - 1739786559, - "0.009095935643143118" - ], - [ - 1739786859, - "0.009121873600573585" - ], - [ - 1739787159, - "0.009126550629839516" - ], - [ - 1739787459, - "0.009133710421648376" - ], - [ - 1739787759, - "0.009145951813987443" - ], - [ - 1739788059, - "0.009141624673337777" - ], - [ - 1739788359, - "0.009141860745902083" - ], - [ - 1739788659, - "0.009140670097509828" - ], - [ - 1739788959, - "0.009130774197275822" - ], - [ - 1739789259, - "0.009111968951355867" - ], - [ - 1739789559, - "0.009121130800240896" - ], - [ - 1739789859, - "0.009117942878851645" - ], - [ - 1739790159, - "0.009126771455755866" - ], - [ - 1739790459, - "0.009153510467274579" - ], - [ - 1739790759, - "0.009165751174705604" - ], - [ - 1739791059, - "0.009172422976989395" - ], - [ - 1739791359, - "0.009165314803188105" - ], - [ - 1739791659, - "0.009162682029190416" - ], - [ - 1739791959, - "0.009169567396190837" - ] - ] - } - ] - }, - "random_key": "0we9", - "tool_name": "execute_prometheus_range_query", - "description": "Average HTTP request latency for customer-orders-service", - "query": "http_request_duration_seconds_sum / http_request_duration_seconds_count", - "start": "1739705559", - "end": "1739791959", - "step": 300 -} diff --git a/tests/llm/fixtures/test_ask_holmes/34_http_latency_graph/execute_prometheus_range_query_2.txt b/tests/llm/fixtures/test_ask_holmes/34_http_latency_graph/execute_prometheus_range_query_2.txt deleted file mode 100644 index 080cb1d3..00000000 --- a/tests/llm/fixtures/test_ask_holmes/34_http_latency_graph/execute_prometheus_range_query_2.txt +++ /dev/null @@ -1,2598 +0,0 @@ -{"toolset_name":"prometheus/metrics","tool_name":"execute_prometheus_range_query","match_params":{"query":"rate(http_request_duration_seconds_sum{service=\"customer-orders-service\"}[5m]) / rate(http_request_duration_seconds_count{service=\"customer-orders-service\"}[5m])","description":"*","start":"*","end":"*","step":"*"}} -{ - "status": "success", - "data": { - "resultType": "matrix", - "result": [ - { - "metric": { - "container": "fastapi-app", - "endpoint": "http", - "handler": "/", - "instance": "10.244.0.19:8000", - "job": "customer-orders-service", - "method": "GET", - "namespace": "default", - "pod": "customer-orders-6f5cbdf85-c5fsf", - "service": "customer-orders-service" - }, - "values": [ - [ - 1739772879, - "6.006911542000012" - ], - [ - 1739772939, - "6.006911542000012" - ], - [ - 1739772999, - "7.007923973500012" - ], - [ - 1739773059, - "7.007747082000009" - ], - [ - 1739773119, - "7.758122644250001" - ], - [ - 1739773179, - "7.757933481999991" - ], - [ - 1739773239, - "8.258781271749982" - ], - [ - 1739773299, - "8.758789859749996" - ], - [ - 1739773359, - "9.009249614199996" - ], - [ - 1739773419, - "8.759249685" - ], - [ - 1739773479, - "9.009729588249996" - ], - [ - 1739773539, - "8.759213496500024" - ], - [ - 1739773599, - "7.758259760250013" - ], - [ - 1739773659, - "7.257815685750017" - ], - [ - 1739773719, - "7.508067395250009" - ], - [ - 1739773779, - "7.507443340749944" - ], - [ - 1739773839, - "8.258002217249949" - ], - [ - 1739773899, - "8.208212137399956" - ], - [ - 1739773959, - "8.257937088249943" - ], - [ - 1739774019, - "8.757508732499954" - ], - [ - 1739774079, - "8.256989014999984" - ], - [ - 1739774139, - "8.757990374249971" - ], - [ - 1739774199, - "9.007368194499973" - ], - [ - 1739774259, - "7.756996364250028" - ], - [ - 1739774319, - "7.757246777000034" - ], - [ - 1739774379, - "7.206252581800026" - ], - [ - 1739774439, - "6.2054873940000554" - ], - [ - 1739774499, - "5.505218467750069" - ], - [ - 1739774559, - "6.506225559500024" - ], - [ - 1739774619, - "6.505919401750021" - ], - [ - 1739774679, - "6.506882022250011" - ], - [ - 1739774739, - "6.756950814499987" - ], - [ - 1739774799, - "5.755944083249972" - ], - [ - 1739774859, - "6.0061740067500295" - ], - [ - 1739774919, - "7.257413727750077" - ], - [ - 1739774979, - "6.807173017200056" - ], - [ - 1739775039, - "7.007574977400054" - ], - [ - 1739775099, - "7.507998211250083" - ], - [ - 1739775159, - "7.758073791000015" - ], - [ - 1739775219, - "7.757631947749929" - ], - [ - 1739775279, - "8.00785348324996" - ], - [ - 1739775339, - "8.75813938500005" - ], - [ - 1739775399, - "8.75895610100008" - ], - [ - 1739775459, - "8.509002138000142" - ], - [ - 1739775519, - "8.208447821000117" - ], - [ - 1739775579, - "8.758785697000121" - ], - [ - 1739775639, - "7.75821505099998" - ], - [ - 1739775699, - "7.007337355749996" - ], - [ - 1739775759, - "7.25704141150004" - ], - [ - 1739775819, - "8.006123858000024" - ], - [ - 1739775879, - "8.005737399250052" - ], - [ - 1739775939, - "9.006552918750003" - ], - [ - 1739775999, - "8.205668952799988" - ], - [ - 1739776059, - "7.40516243919992" - ], - [ - 1739776119, - "6.755812964249912" - ], - [ - 1739776179, - "7.256094892499959" - ], - [ - 1739776239, - "6.254962020999982" - ], - [ - 1739776299, - "7.506989841250061" - ], - [ - 1739776359, - "7.257099240999992" - ], - [ - 1739776419, - "7.257704500999922" - ], - [ - 1739776479, - "8.008136494249925" - ], - [ - 1739776539, - "8.207737753999936" - ], - [ - 1739776599, - "7.406046824799886" - ], - [ - 1739776659, - "8.006002731999956" - ], - [ - 1739776719, - "7.755197058000022" - ], - [ - 1739776779, - "7.004803537000043" - ], - [ - 1739776839, - "6.505294160750168" - ], - [ - 1739776899, - "7.255810632500243" - ], - [ - 1739776959, - "7.506593340000222" - ], - [ - 1739777019, - "7.2567012235001584" - ], - [ - 1739777079, - "7.807419219000075" - ], - [ - 1739777139, - "7.806900095199854" - ], - [ - 1739777199, - "7.007337890399866" - ], - [ - 1739777259, - "6.756901174249833" - ], - [ - 1739777319, - "7.007034794499987" - ], - [ - 1739777379, - "5.755951220500038" - ], - [ - 1739777439, - "5.506015439000066" - ], - [ - 1739777499, - "6.507039589749866" - ], - [ - 1739777559, - "6.50689723499977" - ], - [ - 1739777619, - "6.757224703249676" - ], - [ - 1739777679, - "6.807423797599767" - ], - [ - 1739777739, - "7.408275515799868" - ], - [ - 1739777799, - "7.007809097249947" - ], - [ - 1739777859, - "7.50864402975003" - ], - [ - 1739777919, - "8.509629683750063" - ], - [ - 1739777979, - "8.509553631500012" - ], - [ - 1739778039, - "8.008584409499917" - ], - [ - 1739778099, - "8.258727426499945" - ], - [ - 1739778159, - "8.008535119000044" - ], - [ - 1739778219, - "8.408937905199855" - ], - [ - 1739778279, - "8.759193389999837" - ], - [ - 1739778339, - "9.509662840999908" - ], - [ - 1739778399, - "9.259424023999827" - ], - [ - 1739778459, - "9.25870603574981" - ], - [ - 1739778519, - "9.258579091250112" - ], - [ - 1739778579, - "8.00799186925019" - ], - [ - 1739778639, - "8.008101848250135" - ], - [ - 1739778699, - "7.607169210600114" - ], - [ - 1739778759, - "7.406147014999988" - ], - [ - 1739778819, - "6.755173450749909" - ], - [ - 1739778879, - "8.006494438499885" - ], - [ - 1739778939, - "7.25498146699988" - ], - [ - 1739778999, - "7.005592500750026" - ], - [ - 1739779059, - "6.256502665000198" - ], - [ - 1739779119, - "5.255490649499961" - ], - [ - 1739779179, - "6.257142547749936" - ], - [ - 1739779239, - "6.0062971023999125" - ], - [ - 1739779299, - "6.205825084799835" - ], - [ - 1739779359, - "7.206820759999936" - ], - [ - 1739779419, - "7.506797687250128" - ], - [ - 1739779479, - "7.757157862500208" - ], - [ - 1739779539, - "8.008148490500162" - ], - [ - 1739779599, - "8.758846582750039" - ], - [ - 1739779659, - "7.507621480750004" - ], - [ - 1739779719, - "6.756240088249796" - ], - [ - 1739779779, - "7.4070246615998245" - ], - [ - 1739779839, - "8.207858220999878" - ], - [ - 1739779899, - "8.008244007249914" - ], - [ - 1739779959, - "9.009214101499765" - ], - [ - 1739780019, - "9.009816711249869" - ], - [ - 1739780079, - "8.258543936749902" - ], - [ - 1739780139, - "7.50785377599982" - ], - [ - 1739780199, - "6.506054522999876" - ], - [ - 1739780259, - "6.755486902249913" - ], - [ - 1739780319, - "7.406415916200057" - ], - [ - 1739780379, - "7.807376683200163" - ], - [ - 1739780439, - "8.007149607250312" - ], - [ - 1739780499, - "8.257913026250435" - ], - [ - 1739780559, - "8.759074039000325" - ], - [ - 1739780619, - "8.759044418250369" - ], - [ - 1739780679, - "8.50753272650013" - ], - [ - 1739780739, - "8.256971091999958" - ], - [ - 1739780799, - "7.255919399500044" - ], - [ - 1739780859, - "6.805916578000142" - ], - [ - 1739780919, - "6.005136625199884" - ], - [ - 1739780979, - "5.505463550249942" - ], - [ - 1739781039, - "6.25496485650001" - ], - [ - 1739781099, - "6.004087576500297" - ], - [ - 1739781159, - "6.504028474750157" - ], - [ - 1739781219, - "6.50411246475005" - ], - [ - 1739781279, - "7.006252243750168" - ], - [ - 1739781339, - "8.25810831050012" - ], - [ - 1739781399, - "8.508200821750052" - ], - [ - 1739781459, - "8.408000972399895" - ], - [ - 1739781519, - "9.008388794250095" - ], - [ - 1739781579, - "7.756952079000257" - ], - [ - 1739781639, - "7.2567249945000185" - ], - [ - 1739781699, - "7.506822841750362" - ], - [ - 1739781759, - "7.506679440750759" - ], - [ - 1739781819, - "8.507886307500485" - ], - [ - 1739781879, - "7.756432367500565" - ], - [ - 1739781939, - "7.606200286200669" - ], - [ - 1739781999, - "7.406152514800465" - ], - [ - 1739782059, - "7.256033650750368" - ], - [ - 1739782119, - "6.755099485000727" - ], - [ - 1739782179, - "6.755763347500732" - ], - [ - 1739782239, - "6.756299932000275" - ], - [ - 1739782299, - "7.2569120345001465" - ], - [ - 1739782359, - "8.008266116499726" - ], - [ - 1739782419, - "9.00908641074966" - ], - [ - 1739782479, - "8.507753000999855" - ], - [ - 1739782539, - "8.60804555839968" - ], - [ - 1739782599, - "8.257896979999714" - ], - [ - 1739782659, - "7.506653164999534" - ], - [ - 1739782719, - "6.505579416999353" - ], - [ - 1739782779, - "6.50559204999945" - ], - [ - 1739782839, - "6.5056406374997096" - ], - [ - 1739782899, - "7.0066210424997735" - ], - [ - 1739782959, - "7.2570096924996506" - ], - [ - 1739783019, - "6.806151251599658" - ], - [ - 1739783079, - "7.4069048493998695" - ], - [ - 1739783139, - "7.006278527249833" - ], - [ - 1739783199, - "7.0063131767501545" - ], - [ - 1739783259, - "6.756238205750378" - ], - [ - 1739783319, - "8.007956536500387" - ], - [ - 1739783379, - "7.507973012250204" - ], - [ - 1739783439, - "6.757033744750061" - ], - [ - 1739783499, - "7.006845554750271" - ], - [ - 1739783559, - "6.606582078599967" - ], - [ - 1739783619, - "5.805888140999741" - ], - [ - 1739783679, - "5.6053118285995875" - ], - [ - 1739783739, - "6.005154508799387" - ], - [ - 1739783799, - "6.0050672702491275" - ], - [ - 1739783859, - "6.255105172499498" - ], - [ - 1739783919, - "6.254896438999822" - ], - [ - 1739783979, - "6.2547545089996675" - ], - [ - 1739784039, - "6.5050586742499945" - ], - [ - 1739784099, - "6.504393726249873" - ], - [ - 1739784159, - "6.254548460000023" - ], - [ - 1739784219, - "6.8054298293998725" - ], - [ - 1739784279, - "7.2066028612000075" - ], - [ - 1739784339, - "6.756566345749889" - ], - [ - 1739784399, - "7.758117361249788" - ], - [ - 1739784459, - "9.009071176749787" - ], - [ - 1739784519, - "9.008893714999886" - ], - [ - 1739784579, - "9.509336027500012" - ], - [ - 1739784639, - "9.50848044175018" - ], - [ - 1739784699, - "8.507456790250217" - ], - [ - 1739784759, - "8.607977935000235" - ], - [ - 1739784819, - "8.507911053750377" - ], - [ - 1739784879, - "7.757198337000318" - ], - [ - 1739784939, - "6.507019269750344" - ], - [ - 1739784999, - "7.0070338497503135" - ], - [ - 1739785059, - "6.506306887000392" - ], - [ - 1739785119, - "6.506179266500567" - ], - [ - 1739785179, - "7.006924671500656" - ], - [ - 1739785239, - "7.508009333750579" - ], - [ - 1739785299, - "7.007598072800465" - ], - [ - 1739785359, - "6.607329328200285" - ], - [ - 1739785419, - "6.40648359600018" - ], - [ - 1739785479, - "6.2560877297501065" - ], - [ - 1739785539, - "5.004776942249919" - ], - [ - 1739785599, - "5.754398218749884" - ], - [ - 1739785659, - "5.753768947250137" - ], - [ - 1739785719, - "6.504681838000124" - ], - [ - 1739785779, - "7.505842333000146" - ], - [ - 1739785839, - "7.75704187600013" - ], - [ - 1739785899, - "7.807142460999966" - ], - [ - 1739785959, - "8.608516343799783" - ], - [ - 1739786019, - "8.759091488749618" - ], - [ - 1739786079, - "8.257360965999851" - ], - [ - 1739786139, - "8.257390547249543" - ], - [ - 1739786199, - "8.758021212999665" - ], - [ - 1739786259, - "8.50700592999965" - ], - [ - 1739786319, - "8.007658972999252" - ], - [ - 1739786379, - "8.007929957999659" - ], - [ - 1739786439, - "7.807287177800025" - ], - [ - 1739786499, - "7.256592106250082" - ], - [ - 1739786559, - "7.757845622000332" - ], - [ - 1739786619, - "7.757812725500571" - ], - [ - 1739786679, - "7.255932676250268" - ], - [ - 1739786739, - "6.505975235000278" - ], - [ - 1739786799, - "6.254746230999899" - ], - [ - 1739786859, - "6.505287912749736" - ], - [ - 1739786919, - "6.605684546399789" - ], - [ - 1739786979, - "7.4076184249999635" - ], - [ - 1739787039, - "7.607331468999837" - ], - [ - 1739787099, - "7.257614290499987" - ], - [ - 1739787159, - "8.008109908500046" - ], - [ - 1739787219, - "7.507706260250415" - ], - [ - 1739787279, - "6.506457530999796" - ], - [ - 1739787339, - "6.256748055249773" - ], - [ - 1739787399, - "5.255931343499925" - ], - [ - 1739787459, - "5.755976384749374" - ], - [ - 1739787519, - "6.4056667805994945" - ], - [ - 1739787579, - "6.205686133399649" - ], - [ - 1739787639, - "7.206648664799649" - ], - [ - 1739787699, - "7.756845224499557" - ], - [ - 1739787759, - "8.507863866999742" - ], - [ - 1739787819, - "8.008155790499586" - ], - [ - 1739787879, - "9.008084149249498" - ], - [ - 1739787939, - "8.50700700724974" - ], - [ - 1739787999, - "8.006739632249719" - ], - [ - 1739788059, - "8.206862436199662" - ], - [ - 1739788119, - "8.50717903074974" - ], - [ - 1739788179, - "8.506177447499795" - ], - [ - 1739788239, - "8.757055432499783" - ], - [ - 1739788299, - "8.00633324024966" - ], - [ - 1739788359, - "7.505597507999482" - ], - [ - 1739788419, - "6.756879635249788" - ], - [ - 1739788479, - "5.755885748249511" - ], - [ - 1739788539, - "6.256382487249539" - ], - [ - 1739788599, - "6.606263282999498" - ], - [ - 1739788659, - "6.606509446799827" - ], - [ - 1739788719, - "6.756361670499699" - ], - [ - 1739788779, - "7.256847274499705" - ], - [ - 1739788839, - "8.006612696499815" - ], - [ - 1739788899, - "8.507642182500149" - ], - [ - 1739788959, - "8.757758371500131" - ], - [ - 1739789019, - "8.257223546750083" - ], - [ - 1739789079, - "7.757701622000241" - ], - [ - 1739789139, - "7.6077026810002275" - ], - [ - 1739789199, - "7.006821810800101" - ], - [ - 1739789259, - "6.757000689000051" - ], - [ - 1739789319, - "6.756931215750228" - ], - [ - 1739789379, - "6.756295348000094" - ], - [ - 1739789439, - "7.256105054250838" - ], - [ - 1739789499, - "7.50702966300105" - ], - [ - 1739789559, - "8.508131645501635" - ], - [ - 1739789619, - "8.007829722001588" - ], - [ - 1739789679, - "8.208088049801153" - ], - [ - 1739789739, - "7.407878529001027" - ], - [ - 1739789799, - "7.257548537751062" - ], - [ - 1739789859, - "7.507809745000486" - ], - [ - 1739789919, - "8.259023528999933" - ], - [ - 1739789979, - "8.25906597499943" - ], - [ - 1739790039, - "9.510319727248316" - ], - [ - 1739790099, - "8.759468004248447" - ], - [ - 1739790159, - "7.757524089499383" - ], - [ - 1739790219, - "7.807681879599112" - ], - [ - 1739790279, - "7.207106722399475" - ], - [ - 1739790339, - "6.506155116249829" - ], - [ - 1739790399, - "7.256610873499994" - ], - [ - 1739790459, - "7.757528367249506" - ], - [ - 1739790519, - "7.006904954750098" - ], - [ - 1739790579, - "6.756148491000204" - ], - [ - 1739790639, - "6.756098465749346" - ], - [ - 1739790699, - "7.406681810799636" - ], - [ - 1739790759, - "7.0066033943992805" - ], - [ - 1739790819, - "7.406769480399088" - ], - [ - 1739790879, - "8.00761447449895" - ], - [ - 1739790939, - "7.507396508249258" - ], - [ - 1739790999, - "7.006638751249739" - ], - [ - 1739791059, - "7.506741080750544" - ], - [ - 1739791119, - "7.756516784001178" - ], - [ - 1739791179, - "7.255868693751836" - ], - [ - 1739791239, - "7.756859645501208" - ], - [ - 1739791299, - "7.606640099001378" - ], - [ - 1739791359, - "7.756817768251494" - ], - [ - 1739791419, - "7.757055970751026" - ], - [ - 1739791479, - "7.756929871250577" - ], - [ - 1739791539, - "6.755962184750387" - ], - [ - 1739791599, - "7.256807451999521" - ], - [ - 1739791659, - "7.506789670249418" - ], - [ - 1739791719, - "8.507846381248783" - ], - [ - 1739791779, - "9.007917742498648" - ], - [ - 1739791839, - "8.206929353998566" - ], - [ - 1739791899, - "7.40618286619865" - ], - [ - 1739791959, - "7.006070003748391" - ] - ] - }, - { - "metric": { - "container": "fastapi-app", - "endpoint": "http", - "handler": "/metrics", - "instance": "10.244.0.19:8000", - "job": "customer-orders-service", - "method": "GET", - "namespace": "default", - "pod": "customer-orders-6f5cbdf85-c5fsf", - "service": "customer-orders-service" - }, - "values": [ - [ - 1739772879, - "0.0012452383865412821" - ], - [ - 1739772939, - "0.0011021824072294769" - ], - [ - 1739772999, - "0.0011294081469067538" - ], - [ - 1739773059, - "0.001135836655142644" - ], - [ - 1739773119, - "0.0011470835526306712" - ], - [ - 1739773179, - "0.00115186292105525" - ], - [ - 1739773239, - "0.0011534796842116329" - ], - [ - 1739773299, - "0.0011486553157887532" - ], - [ - 1739773359, - "0.0012556393421005538" - ], - [ - 1739773419, - "0.001283323973672467" - ], - [ - 1739773479, - "0.0012944506578827185" - ], - [ - 1739773539, - "0.0012690807105137105" - ], - [ - 1739773599, - "0.0012868301052435525" - ], - [ - 1739773659, - "0.0012122121842020007" - ], - [ - 1739773719, - "0.001191606684199685" - ], - [ - 1739773779, - "0.0012086908157824068" - ], - [ - 1739773839, - "0.001214202447368064" - ], - [ - 1739773899, - "0.0011817770263178316" - ], - [ - 1739773959, - "0.0011689156315780659" - ], - [ - 1739774019, - "0.0011344907631526343" - ], - [ - 1739774079, - "0.0011330818420889812" - ], - [ - 1739774139, - "0.0011494284736595784" - ], - [ - 1739774199, - "0.0011701677631328108" - ], - [ - 1739774259, - "0.0011785201841899163" - ], - [ - 1739774319, - "0.0012201845263103007" - ], - [ - 1739774379, - "0.0012404352105357629" - ], - [ - 1739774439, - "0.0012217556052710756" - ], - [ - 1739774499, - "0.0012066817368553242" - ], - [ - 1739774559, - "0.0011923439210656732" - ], - [ - 1739774619, - "0.0011694210263254047" - ], - [ - 1739774679, - "0.0011751934736917303" - ], - [ - 1739774739, - "0.0011791902105343117" - ], - [ - 1739774799, - "0.001175995447367409" - ], - [ - 1739774859, - "0.001187559289471545" - ], - [ - 1739774919, - "0.0011843552104940163" - ], - [ - 1739774979, - "0.0011656749210380056" - ], - [ - 1739775039, - "0.0011786240789110466" - ], - [ - 1739775099, - "0.0012222277105018674" - ], - [ - 1739775159, - "0.0012404070789312084" - ], - [ - 1739775219, - "0.0012831252368438358" - ], - [ - 1739775279, - "0.0012967554736836684" - ], - [ - 1739775339, - "0.001318222631605175" - ], - [ - 1739775399, - "0.0013250136578933864" - ], - [ - 1739775459, - "0.0012612806053048666" - ], - [ - 1739775519, - "0.0012317406842492727" - ], - [ - 1739775579, - "0.0012117342105404149" - ], - [ - 1739775639, - "0.0011665133684250336" - ], - [ - 1739775699, - "0.0011342130263098" - ], - [ - 1739775759, - "0.0011617333420992125" - ], - [ - 1739775819, - "0.001180116421042648" - ], - [ - 1739775879, - "0.0011980422631567259" - ], - [ - 1739775939, - "0.0011673096315749525" - ], - [ - 1739775999, - "0.0012106526315787242" - ], - [ - 1739776059, - "0.001207485263158129" - ], - [ - 1739776119, - "0.0012190188947083936" - ], - [ - 1739776179, - "0.0012268279736441685" - ], - [ - 1739776239, - "0.0011919777368084874" - ], - [ - 1739776299, - "0.0011854715525930115" - ], - [ - 1739776359, - "0.0012032467894689607" - ], - [ - 1739776419, - "0.00119225228949242" - ], - [ - 1739776479, - "0.0012415669737038115" - ], - [ - 1739776539, - "0.00127350868423922" - ], - [ - 1739776599, - "0.0013054765263404988" - ], - [ - 1739776659, - "0.001290266236848861" - ], - [ - 1739776719, - "0.001291975842138383" - ], - [ - 1739776779, - "0.0012385826842450784" - ], - [ - 1739776839, - "0.0012258659473780807" - ], - [ - 1739776899, - "0.0011838809210339126" - ], - [ - 1739776959, - "0.0011804123947082475" - ], - [ - 1739777019, - "0.0011929527368130563" - ], - [ - 1739777079, - "0.0012255948946451672" - ], - [ - 1739777139, - "0.0012238133946610966" - ], - [ - 1739777199, - "0.001216830499958489" - ], - [ - 1739777259, - "0.0011892045262787798" - ], - [ - 1739777319, - "0.0011627112104705653" - ], - [ - 1739777379, - "0.0011304774210370132" - ], - [ - 1739777439, - "0.0011219362894687372" - ], - [ - 1739777499, - "0.0011565576578265646" - ], - [ - 1739777559, - "0.0011767267631177288" - ], - [ - 1739777619, - "0.001203721473613864" - ], - [ - 1739777679, - "0.0012196116315288618" - ], - [ - 1739777739, - "0.001185833736790779" - ], - [ - 1739777799, - "0.0011569347631465413" - ], - [ - 1739777859, - "0.0011834574736559788" - ], - [ - 1739777919, - "0.001169089447366308" - ], - [ - 1739777979, - "0.001181287763134414" - ], - [ - 1739778039, - "0.0011950855789205218" - ], - [ - 1739778099, - "0.0011956171315911264" - ], - [ - 1739778159, - "0.001189542605324708" - ], - [ - 1739778219, - "0.0012056763421720747" - ], - [ - 1739778279, - "0.0012039200263835052" - ], - [ - 1739778339, - "0.0011949763158650296" - ], - [ - 1739778399, - "0.0011846133158398612" - ], - [ - 1739778459, - "0.0012063784210544037" - ], - [ - 1739778519, - "0.0012242177368343137" - ], - [ - 1739778579, - "0.0012164939473797475" - ], - [ - 1739778639, - "0.001226517500000275" - ], - [ - 1739778699, - "0.0012087204473860574" - ], - [ - 1739778759, - "0.0011678999999994151" - ], - [ - 1739778819, - "0.0011687038947074094" - ], - [ - 1739778879, - "0.0011970142631023343" - ], - [ - 1739778939, - "0.0011737270788934869" - ], - [ - 1739778999, - "0.001189326684174088" - ], - [ - 1739779059, - "0.001190344500005985" - ], - [ - 1739779119, - "0.0011516685526418624" - ], - [ - 1739779179, - "0.0011230707105871834" - ], - [ - 1739779239, - "0.0011881868421800103" - ], - [ - 1739779299, - "0.0011840153421728342" - ], - [ - 1739779359, - "0.0011781441053244632" - ], - [ - 1739779419, - "0.001188619026379456" - ], - [ - 1739779479, - "0.0011691458684684843" - ], - [ - 1739779539, - "0.0011576887631427348" - ], - [ - 1739779599, - "0.001153810184237763" - ], - [ - 1739779659, - "0.0011647171052955902" - ], - [ - 1739779719, - "0.0011839855263276963" - ], - [ - 1739779779, - "0.001153958736889954" - ], - [ - 1739779839, - "0.001163187815875158" - ], - [ - 1739779899, - "0.0011844375000548194" - ], - [ - 1739779959, - "0.001188830000046437" - ], - [ - 1739780019, - "0.0012252005000223823" - ], - [ - 1739780079, - "0.0012455845263072036" - ], - [ - 1739780139, - "0.0012380379473478041" - ], - [ - 1739780199, - "0.0012396869210054941" - ], - [ - 1739780259, - "0.001196281552582054" - ], - [ - 1739780319, - "0.0011970753683654995" - ], - [ - 1739780379, - "0.0011913292631489686" - ], - [ - 1739780439, - "0.0012147320000239184" - ], - [ - 1739780499, - "0.0012132596315687374" - ], - [ - 1739780559, - "0.0012121927368417872" - ], - [ - 1739780619, - "0.001186878552624349" - ], - [ - 1739780679, - "0.001200178842066778" - ], - [ - 1739780739, - "0.0011874605789240664" - ], - [ - 1739780799, - "0.001193677157899449" - ], - [ - 1739780859, - "0.0012063521841905879" - ], - [ - 1739780919, - "0.0012312662106045247" - ], - [ - 1739780979, - "0.0012181446315097624" - ], - [ - 1739781039, - "0.0012645707104303782" - ], - [ - 1739781099, - "0.0012476974998207877" - ], - [ - 1739781159, - "0.0012539288681613183" - ], - [ - 1739781219, - "0.0012483303418392958" - ], - [ - 1739781279, - "0.001235638394638571" - ], - [ - 1739781339, - "0.001250489815615047" - ], - [ - 1739781399, - "0.0012529362894910215" - ], - [ - 1739781459, - "0.0012530619474809528" - ], - [ - 1739781519, - "0.0012655263948736797" - ], - [ - 1739781579, - "0.0012705405264760452" - ], - [ - 1739781639, - "0.0012397417896959507" - ], - [ - 1739781699, - "0.0012277347370017147" - ], - [ - 1739781759, - "0.0012296958159487602" - ], - [ - 1739781819, - "0.0012108243686031383" - ], - [ - 1739781879, - "0.0012054235001797726" - ], - [ - 1739781939, - "0.001219491000244345" - ], - [ - 1739781999, - "0.0012194646317431378" - ], - [ - 1739782059, - "0.0012307463159327437" - ], - [ - 1739782119, - "0.0012429551843045996" - ], - [ - 1739782179, - "0.0012631387631987117" - ], - [ - 1739782239, - "0.001213865263085417" - ], - [ - 1739782299, - "0.001223264657758256" - ], - [ - 1739782359, - "0.0012216671578387734" - ], - [ - 1739782419, - "0.001198508289522387" - ], - [ - 1739782479, - "0.0012054404736964465" - ], - [ - 1739782539, - "0.0012257149735791916" - ], - [ - 1739782599, - "0.0012441268156276476" - ], - [ - 1739782659, - "0.0012549833156395164" - ], - [ - 1739782719, - "0.0012481877892672568" - ], - [ - 1739782779, - "0.0012525283945203562" - ], - [ - 1739782839, - "0.0012412602893738564" - ], - [ - 1739782899, - "0.0012222647893921882" - ], - [ - 1739782959, - "0.0012127628421551074" - ], - [ - 1739783019, - "0.001189594605373139" - ], - [ - 1739783079, - "0.001171396500180172" - ], - [ - 1739783139, - "0.0011822708687472378" - ], - [ - 1739783199, - "0.0011516235528305466" - ], - [ - 1739783259, - "0.001123888026373415" - ], - [ - 1739783319, - "0.0011338010263946335" - ], - [ - 1739783379, - "0.0011175123157188768" - ], - [ - 1739783439, - "0.001101427947201285" - ], - [ - 1739783499, - "0.0011359339209536157" - ], - [ - 1739783559, - "0.0011929987630011102" - ], - [ - 1739783619, - "0.0011781149472115335" - ], - [ - 1739783679, - "0.0011959323946460867" - ], - [ - 1739783739, - "0.001272350315766265" - ], - [ - 1739783799, - "0.0013101245525544262" - ], - [ - 1739783859, - "0.001303722921061547" - ], - [ - 1739783919, - "0.0013194413947865923" - ], - [ - 1739783979, - "0.0013447907631416456" - ], - [ - 1739784039, - "0.0012852255262930012" - ], - [ - 1739784099, - "0.0012328958946454804" - ], - [ - 1739784159, - "0.0012187291578907445" - ], - [ - 1739784219, - "0.0012177092105144093" - ], - [ - 1739784279, - "0.0012003229737130917" - ], - [ - 1739784339, - "0.0011935900263683593" - ], - [ - 1739784399, - "0.001217946236800727" - ], - [ - 1739784459, - "0.0012278380789116216" - ], - [ - 1739784519, - "0.0012615707631228265" - ], - [ - 1739784579, - "0.0012357461314662175" - ], - [ - 1739784639, - "0.001244307973617029" - ], - [ - 1739784699, - "0.0012131706052969093" - ], - [ - 1739784759, - "0.0012251488684629102" - ], - [ - 1739784819, - "0.0012695369474673306" - ], - [ - 1739784879, - "0.0012991347634133967" - ], - [ - 1739784939, - "0.0012977402107265477" - ], - [ - 1739784999, - "0.0013180940263976624" - ], - [ - 1739785059, - "0.0012734894737245906" - ], - [ - 1739785119, - "0.001295302315779018" - ], - [ - 1739785179, - "0.0013156976578293802" - ], - [ - 1739785239, - "0.0013025044736423297" - ], - [ - 1739785299, - "0.0012712879473072366" - ], - [ - 1739785359, - "0.0012884562103959308" - ], - [ - 1739785419, - "0.001265879157759436" - ], - [ - 1739785479, - "0.0012634348419229163" - ], - [ - 1739785539, - "0.0012671580263103302" - ], - [ - 1739785599, - "0.0012402374472194901" - ], - [ - 1739785659, - "0.0011917212104411814" - ], - [ - 1739785719, - "0.0011627890263346335" - ], - [ - 1739785779, - "0.00114936586841826" - ], - [ - 1739785839, - "0.0011621932894501552" - ], - [ - 1739785899, - "0.0011847695265092431" - ], - [ - 1739785959, - "0.0011747959475350827" - ], - [ - 1739786019, - "0.0012058744212864487" - ], - [ - 1739786079, - "0.0012175978686891058" - ], - [ - 1739786139, - "0.0012239262633125978" - ], - [ - 1739786199, - "0.0011931712368478703" - ], - [ - 1739786259, - "0.0012284960525664101" - ], - [ - 1739786319, - "0.001194044868354563" - ], - [ - 1739786379, - "0.0011821863683603862" - ], - [ - 1739786439, - "0.0011616816580403552" - ], - [ - 1739786499, - "0.001164486473786191" - ], - [ - 1739786559, - "0.0011737896579928162" - ], - [ - 1739786619, - "0.0011552616317299237" - ], - [ - 1739786679, - "0.0012214830000486566" - ], - [ - 1739786739, - "0.0012242889474931105" - ], - [ - 1739786799, - "0.0012133733158694474" - ], - [ - 1739786859, - "0.001233963789577262" - ], - [ - 1739786919, - "0.00123786657896792" - ], - [ - 1739786979, - "0.0012523807895137762" - ], - [ - 1739787039, - "0.0012765136579649772" - ], - [ - 1739787099, - "0.0012762117895411059" - ], - [ - 1739787159, - "0.0012665708947775113" - ], - [ - 1739787219, - "0.0012451316578287667" - ], - [ - 1739787279, - "0.0011991776051026566" - ], - [ - 1739787339, - "0.0011991311839665286" - ], - [ - 1739787399, - "0.0011673921050092567" - ], - [ - 1739787459, - "0.0011733921050778773" - ], - [ - 1739787519, - "0.0011747593682325953" - ], - [ - 1739787579, - "0.0011590424209268683" - ], - [ - 1739787639, - "0.0011856463157336723" - ], - [ - 1739787699, - "0.0012204303420252183" - ], - [ - 1739787759, - "0.0012336861578468845" - ], - [ - 1739787819, - "0.0012438074736564884" - ], - [ - 1739787879, - "0.0012575933419851169" - ], - [ - 1739787939, - "0.0012748296841987449" - ], - [ - 1739787999, - "0.0012548244473548132" - ], - [ - 1739788059, - "0.001256918421045396" - ], - [ - 1739788119, - "0.0012095183158300257" - ], - [ - 1739788179, - "0.0012353286052817374" - ], - [ - 1739788239, - "0.0012623579737249344" - ], - [ - 1739788299, - "0.0012810851577868512" - ], - [ - 1739788359, - "0.0013011450525817435" - ], - [ - 1739788419, - "0.0012840925261403562" - ], - [ - 1739788479, - "0.0012804349998508482" - ], - [ - 1739788539, - "0.0012128312366353623" - ], - [ - 1739788599, - "0.0011838883683594759" - ], - [ - 1739788659, - "0.0011840583420053008" - ], - [ - 1739788719, - "0.0011833225526061142" - ], - [ - 1739788779, - "0.0011513764736120814" - ], - [ - 1739788839, - "0.0011467447368072737" - ], - [ - 1739788899, - "0.0011199430526080264" - ], - [ - 1739788959, - "0.0010943155790109334" - ], - [ - 1739789019, - "0.0010856926052694785" - ], - [ - 1739789079, - "0.0011163637107433474" - ], - [ - 1739789139, - "0.0011230523160089718" - ], - [ - 1739789199, - "0.0011303285527422924" - ], - [ - 1739789259, - "0.0011504116316102323" - ], - [ - 1739789319, - "0.0011616775262537932" - ], - [ - 1739789379, - "0.0011509905263282186" - ], - [ - 1739789439, - "0.001135927184134887" - ], - [ - 1739789499, - "0.0011405647104478895" - ], - [ - 1739789559, - "0.001154124184196055" - ], - [ - 1739789619, - "0.0011711976843032873" - ], - [ - 1739789679, - "0.0011812759476015342" - ], - [ - 1739789739, - "0.0012035877634365246" - ], - [ - 1739789799, - "0.001211816658101988" - ], - [ - 1739789859, - "0.0012147785529171078" - ], - [ - 1739789919, - "0.0012267888684475204" - ], - [ - 1739789979, - "0.0012007500263280233" - ], - [ - 1739790039, - "0.001224611894557663" - ], - [ - 1739790099, - "0.0011709251315264017" - ], - [ - 1739790159, - "0.001179241500051974" - ], - [ - 1739790219, - "0.0011746471315602417" - ], - [ - 1739790279, - "0.0011857904475235906" - ], - [ - 1739790339, - "0.0012106941315945924" - ], - [ - 1739790399, - "0.0012362302629634013" - ], - [ - 1739790459, - "0.0012169124472022398" - ], - [ - 1739790519, - "0.0012604539209094484" - ], - [ - 1739790579, - "0.001251737526074162" - ], - [ - 1739790639, - "0.001240572710326217" - ], - [ - 1739790699, - "0.0012782500789877591" - ], - [ - 1739790759, - "0.0012499899472267744" - ], - [ - 1739790819, - "0.0012627959471214316" - ], - [ - 1739790879, - "0.0012542925261713159" - ], - [ - 1739790939, - "0.0012139848153502066" - ], - [ - 1739790999, - "0.0011975972098106898" - ], - [ - 1739791059, - "0.0012375059469425316" - ], - [ - 1739791119, - "0.0012196287627471304" - ], - [ - 1739791179, - "0.0012414592366487914" - ], - [ - 1739791239, - "0.0012427445787915002" - ], - [ - 1739791299, - "0.0012299963950920898" - ], - [ - 1739791359, - "0.001193801684406169" - ], - [ - 1739791419, - "0.0012000323949905578" - ], - [ - 1739791479, - "0.0012537508420408764" - ], - [ - 1739791539, - "0.0012653948946727293" - ], - [ - 1739791599, - "0.00131244555239774" - ], - [ - 1739791659, - "0.0013285718681512852" - ], - [ - 1739791719, - "0.0013191252363819665" - ], - [ - 1739791779, - "0.0012512513419910406" - ], - [ - 1739791839, - "0.0012393901579343947" - ], - [ - 1739791899, - "0.001182507684509466" - ], - [ - 1739791959, - "0.0011744442896375212" - ] - ] - } - ] - }, - "random_key": "9kLK", - "tool_name": "execute_prometheus_range_query", - "description": "Average HTTP request latency for customer-orders-service", - "query": "rate(http_request_duration_seconds_sum{service=\"customer-orders-service\"}[5m]) / rate(http_request_duration_seconds_count{service=\"customer-orders-service\"}[5m])", - "start": "1739705559", - "end": "1739791959", - "step": 60 -} diff --git a/tests/test_openai_formatting.py b/tests/test_openai_formatting.py index 33eb60ff..05240c0c 100644 --- a/tests/test_openai_formatting.py +++ b/tests/test_openai_formatting.py @@ -1,5 +1,3 @@ - - import pytest from holmes.core.openai_formatting import type_to_open_ai_schema From 2b3a55a5b90b3ca813d6625b00fa573e4654668c Mon Sep 17 00:00:00 2001 From: Nicolas Herment Date: Fri, 21 Feb 2025 10:35:06 +0100 Subject: [PATCH 08/14] doc: add link to docs for datetime toolset --- holmes/plugins/toolsets/datetime.py | 1 + 1 file changed, 1 insertion(+) diff --git a/holmes/plugins/toolsets/datetime.py b/holmes/plugins/toolsets/datetime.py index 48c4a39c..cdc65874 100644 --- a/holmes/plugins/toolsets/datetime.py +++ b/holmes/plugins/toolsets/datetime.py @@ -26,6 +26,7 @@ def __init__(self): name="datetime", enabled=True, description="Current date and time information", + docs_url="https://docs.robusta.dev/master/configuration/holmesgpt/toolsets/datetime.html", icon_url="https://platform.robusta.dev/demos/internet-access.svg", prerequisites=[], tools=[CurrentTime()], From 45c5617a31479944d9764c5ec8bfae80c9db99be Mon Sep 17 00:00:00 2001 From: Nicolas Herment Date: Fri, 21 Feb 2025 11:24:12 +0100 Subject: [PATCH 09/14] feat: update icon for datetime toolset --- holmes/plugins/toolsets/datetime.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/holmes/plugins/toolsets/datetime.py b/holmes/plugins/toolsets/datetime.py index cdc65874..90981925 100644 --- a/holmes/plugins/toolsets/datetime.py +++ b/holmes/plugins/toolsets/datetime.py @@ -27,7 +27,7 @@ def __init__(self): enabled=True, description="Current date and time information", docs_url="https://docs.robusta.dev/master/configuration/holmesgpt/toolsets/datetime.html", - icon_url="https://platform.robusta.dev/demos/internet-access.svg", + icon_url="https://upload.wikimedia.org/wikipedia/commons/8/8b/OOjs_UI_icon_calendar-ltr.svg", prerequisites=[], tools=[CurrentTime()], tags=[ToolsetTag.CORE], From 494cbfd7d220154e19e16484ddfde4f5003b7100 Mon Sep 17 00:00:00 2001 From: Nicolas Herment Date: Fri, 21 Feb 2025 11:27:11 +0100 Subject: [PATCH 10/14] fix: remove unused var --- holmes/plugins/toolsets/prometheus.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/holmes/plugins/toolsets/prometheus.py b/holmes/plugins/toolsets/prometheus.py index 228e7d03..824242b4 100644 --- a/holmes/plugins/toolsets/prometheus.py +++ b/holmes/plugins/toolsets/prometheus.py @@ -23,8 +23,6 @@ from holmes.utils.cache import TTLCache -cache = None - class PrometheusConfig(BaseModel): prometheus_url: Union[str, None] From 31ec106cdd5ad4e8167c5a96505d55f0afdf690e Mon Sep 17 00:00:00 2001 From: Nicolas Herment Date: Fri, 21 Feb 2025 11:30:11 +0100 Subject: [PATCH 11/14] fix: revert change to labels query for testing --- holmes/plugins/toolsets/prometheus.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/holmes/plugins/toolsets/prometheus.py b/holmes/plugins/toolsets/prometheus.py index 824242b4..fc3d3aee 100644 --- a/holmes/plugins/toolsets/prometheus.py +++ b/holmes/plugins/toolsets/prometheus.py @@ -89,12 +89,12 @@ def fetch_metrics_labels( return cached_result series_url = urljoin(prometheus_url, "/api/v1/series") - # params: dict = { - # "match[]": f'{{__name__=~".*{metric_name}.*"}}', - # } params: dict = { - "match[]": '{__name__!=""}', + "match[]": f'{{__name__=~".*{metric_name}.*"}}', } + # params: dict = { + # "match[]": '{__name__!=""}', + # } if metrics_labels_time_window_hrs is not None: params["end_time"] = int(time.time()) params["start_time"] = params["end_time"] - ( From 58be19c4b1fe5086495885a42c4c35051f9224c3 Mon Sep 17 00:00:00 2001 From: Nicolas Herment Date: Tue, 25 Feb 2025 07:38:37 +0100 Subject: [PATCH 12/14] Add type to promql embed, document api for promql results --- FEATURES.md | 136 ++++++++++++++++++ docs/installation.md | 8 +- helm/holmes/templates/holmes.yaml | 3 + .../prompts/generic_ask_conversation.jinja2 | 2 +- .../test_case.yaml | 5 - .../kubectl_events.txt | 15 ++ .../kubectl_find_resource.txt | 5 + .../test_case.yaml | 5 + .../execute_prometheus_range_query.txt | 0 .../get_current_time.txt | 0 .../list_available_metrics.txt | 0 .../test_case.yaml | 5 + .../test_case.yaml | 5 - .../test_case.yaml | 5 - .../execute_prometheus_range_query.txt | 0 .../get_current_time.txt | 0 .../kubectl_find_resource.txt | 0 .../list_available_metrics.txt | 0 .../test_case.yaml | 5 + .../execute_prometheus_range_query.txt | 0 ..._prometheus_range_query_with_namespace.txt | 0 .../get_current_time.txt | 0 .../kubectl_find_resource.txt | 0 .../list_available_metrics.txt | 0 .../test_case.yaml | 5 + .../execute_prometheus_range_query.txt | 0 .../execute_prometheus_range_query_2.txt | 0 .../execute_prometheus_range_query_median.txt | 0 .../get_current_time.txt | 0 .../helm/Dockerfile | 0 .../helm/app.py | 0 .../helm/build.sh | 0 .../helm/manifest.yaml | 0 .../helm/requirements.txt | 0 .../kubectl_find_resource.txt | 0 .../list_available_metrics.txt | 0 .../test_case.yaml | 4 +- tests/llm/test_ask_holmes.py | 3 +- tool_call.json | 10 ++ 39 files changed, 201 insertions(+), 20 deletions(-) create mode 100644 FEATURES.md delete mode 100644 tests/llm/fixtures/test_ask_holmes/29_basic_promql_graph_cluster_memory/test_case.yaml create mode 100644 tests/llm/fixtures/test_ask_holmes/29_events_from_alert_manager/kubectl_events.txt create mode 100644 tests/llm/fixtures/test_ask_holmes/29_events_from_alert_manager/kubectl_find_resource.txt create mode 100644 tests/llm/fixtures/test_ask_holmes/29_events_from_alert_manager/test_case.yaml rename tests/llm/fixtures/test_ask_holmes/{29_basic_promql_graph_cluster_memory => 30_basic_promql_graph_cluster_memory}/execute_prometheus_range_query.txt (100%) rename tests/llm/fixtures/test_ask_holmes/{29_basic_promql_graph_cluster_memory => 30_basic_promql_graph_cluster_memory}/get_current_time.txt (100%) rename tests/llm/fixtures/test_ask_holmes/{29_basic_promql_graph_cluster_memory => 30_basic_promql_graph_cluster_memory}/list_available_metrics.txt (100%) create mode 100644 tests/llm/fixtures/test_ask_holmes/30_basic_promql_graph_cluster_memory/test_case.yaml delete mode 100644 tests/llm/fixtures/test_ask_holmes/30_basic_promql_graph_pod_memory/test_case.yaml delete mode 100644 tests/llm/fixtures/test_ask_holmes/31_basic_promql_graph_pod_cpu/test_case.yaml rename tests/llm/fixtures/test_ask_holmes/{30_basic_promql_graph_pod_memory => 31_basic_promql_graph_pod_memory}/execute_prometheus_range_query.txt (100%) rename tests/llm/fixtures/test_ask_holmes/{30_basic_promql_graph_pod_memory => 31_basic_promql_graph_pod_memory}/get_current_time.txt (100%) rename tests/llm/fixtures/test_ask_holmes/{30_basic_promql_graph_pod_memory => 31_basic_promql_graph_pod_memory}/kubectl_find_resource.txt (100%) rename tests/llm/fixtures/test_ask_holmes/{30_basic_promql_graph_pod_memory => 31_basic_promql_graph_pod_memory}/list_available_metrics.txt (100%) create mode 100644 tests/llm/fixtures/test_ask_holmes/31_basic_promql_graph_pod_memory/test_case.yaml rename tests/llm/fixtures/test_ask_holmes/{31_basic_promql_graph_pod_cpu => 32_basic_promql_graph_pod_cpu}/execute_prometheus_range_query.txt (100%) rename tests/llm/fixtures/test_ask_holmes/{31_basic_promql_graph_pod_cpu => 32_basic_promql_graph_pod_cpu}/execute_prometheus_range_query_with_namespace.txt (100%) rename tests/llm/fixtures/test_ask_holmes/{31_basic_promql_graph_pod_cpu => 32_basic_promql_graph_pod_cpu}/get_current_time.txt (100%) rename tests/llm/fixtures/test_ask_holmes/{31_basic_promql_graph_pod_cpu => 32_basic_promql_graph_pod_cpu}/kubectl_find_resource.txt (100%) rename tests/llm/fixtures/test_ask_holmes/{31_basic_promql_graph_pod_cpu => 32_basic_promql_graph_pod_cpu}/list_available_metrics.txt (100%) create mode 100644 tests/llm/fixtures/test_ask_holmes/32_basic_promql_graph_pod_cpu/test_case.yaml rename tests/llm/fixtures/test_ask_holmes/{32_http_latency_graph => 33_http_latency_graph}/execute_prometheus_range_query.txt (100%) rename tests/llm/fixtures/test_ask_holmes/{32_http_latency_graph => 33_http_latency_graph}/execute_prometheus_range_query_2.txt (100%) rename tests/llm/fixtures/test_ask_holmes/{32_http_latency_graph => 33_http_latency_graph}/execute_prometheus_range_query_median.txt (100%) rename tests/llm/fixtures/test_ask_holmes/{32_http_latency_graph => 33_http_latency_graph}/get_current_time.txt (100%) rename tests/llm/fixtures/test_ask_holmes/{32_http_latency_graph => 33_http_latency_graph}/helm/Dockerfile (100%) rename tests/llm/fixtures/test_ask_holmes/{32_http_latency_graph => 33_http_latency_graph}/helm/app.py (100%) rename tests/llm/fixtures/test_ask_holmes/{32_http_latency_graph => 33_http_latency_graph}/helm/build.sh (100%) rename tests/llm/fixtures/test_ask_holmes/{32_http_latency_graph => 33_http_latency_graph}/helm/manifest.yaml (100%) rename tests/llm/fixtures/test_ask_holmes/{32_http_latency_graph => 33_http_latency_graph}/helm/requirements.txt (100%) rename tests/llm/fixtures/test_ask_holmes/{32_http_latency_graph => 33_http_latency_graph}/kubectl_find_resource.txt (100%) rename tests/llm/fixtures/test_ask_holmes/{32_http_latency_graph => 33_http_latency_graph}/list_available_metrics.txt (100%) rename tests/llm/fixtures/test_ask_holmes/{32_http_latency_graph => 33_http_latency_graph}/test_case.yaml (56%) create mode 100644 tool_call.json diff --git a/FEATURES.md b/FEATURES.md new file mode 100644 index 00000000..0830112f --- /dev/null +++ b/FEATURES.md @@ -0,0 +1,136 @@ + +# Features + +This page document and describes HolmesGPT's behaviour when it comes to its features. + + +## Root Cause Analysis + +Also called Investigation, Root Cause Analysis (RCA) is HolmesGPT's ability to investigate alerts, +typically from Prometheus' alert manager. + +### Sectioned output + +HolmesGPT generates structured output by default. It is also capable of generating sections based on request. + +Here is an example of a request payload to run an investigation: + +```json +{ + "source": "prometheus", + "source_instance_id": "some-instance", + "title": "Pod is crash looping.", + "description": "Pod default/oomkill-deployment-696dbdbf67-d47z6 (main2) is in waiting state (reason: 'CrashLoopBackOff').", + "subject": { + "name": "oomkill-deployment-696dbdbf67-d47z6", + "subject_type": "deployment", + "namespace": "default", + "node": "some-node", + "container": "main2", + "labels": { + "x": "y", + "p": "q" + }, + "annotations": {} + }, + "context": + { + "robusta_issue_id": "5b3e2fb1-cb83-45ea-82ec-318c94718e44" + }, + "include_tool_calls": true, + "include_tool_call_results": true + "sections": { + "Alert Explanation": "1-2 sentences explaining the alert itself - note don't say \"The alert indicates a warning event related to a Kubernetes pod doing blah\" rather just say \"The pod XYZ did blah\" because that is what the user actually cares about", + "Conclusions and Possible Root causes": "What conclusions can you reach based on the data you found? what are possible root causes (if you have enough conviction to say) or what uncertainty remains. Don't say root cause but 'possible root causes'. Be clear to distinguish between what you know for certain and what is a possible explanation", + "Related logs": "Truncate and share the most relevant logs, especially if these explain the root cause. For example: \nLogs from pod robusta-holmes:\n```\n```\n. Always embed the surroundding +/- 5 log lines to any relevant logs. " + } +} +``` + +Notice that the "sections" field contains 3 different sections. The text value for each section should be a prompt telling the LLM what the section should contain. +You can then expect the following in return: + +``` +{ + "analysis": , + "sections": { + "Alert Explanation": , + "Conclusions and Possible Root causes": , + "Related logs": + }, + "tool_calls": , + "instructions": +} +``` + +In some cases, the LLM may decide to set a section to `null` or even add or ignore some sections. + + +## PromQL + +If the `prometheus/metrics` toolset is enabled, HolmesGPT can generate embed graphs in conversations (ask holmes). + +For example, here is scenario in which the LLM answers with a graph: + + +User question: + +``` +Show me the http request latency over time for the service customer-orders-service? +``` + + +HolmesGPT text response: +``` +Here's the average HTTP request latency over time for the `customer-orders-service`: + +<< {type: "promql", tool_name: "execute_prometheus_range_query", random_key: "9kLK"} >> +`` + +In addition to this text response, the returned JSON will contain one or more tool calls, including the prometheus query: + +``` +"tool_calls": [ + { + "tool_call_id": "call_lKI7CQW6Y2n1ZQ5dlxX79TcM", + "tool_name": "execute_prometheus_range_query", + "description": "Prometheus query_range. query=rate(http_request_duration_seconds_sum{service=\"customer-orders-service\"}[5m]) / rate(http_request_duration_seconds_count{service=\"customer-orders-service\"}[5m]), start=1739705559, end=1739791959, step=300, description=HTTP request latency for customer-orders-service", + "result": "{\n \"status\": \"success\",\n \"random_key\": \"9kLK\",\n \"tool_name\": \"execute_prometheus_range_query\",\n \"description\": \"Average HTTP request latency for customer-orders-service\",\n \"query\": \"rate(http_request_duration_seconds_sum{service=\\\"customer-orders-service\\\"}[5m]) / rate(http_request_duration_seconds_count{service=\\\"customer-orders-service\\\"}[5m])\",\n \"start\": \"1739705559\",\n \"end\": \"1739791959\",\n \"step\": 60\n}" + } +], +``` + +The result of this tool call contains details about the [prometheus query](https://prometheus.io/docs/prometheus/latest/querying/api/#range-queries) to build the graph returned by HolmesGPT: + +```json +{ + "status": "success", + "random_key": "9kLK", + "tool_name": "execute_prometheus_range_query", + "description": "Average HTTP request latency for customer-orders-service", + "query": "rate(http_request_duration_seconds_sum{service=\"customer-orders-service\"}[5m]) / rate(http_request_duration_seconds_count{service=\"customer-orders-service\"}[5m])", + "start": "1739705559", // Can be rfc3339 or a unix timestamp + "end": "1739791959", // Can be rfc3339 or a unix timestamp + "step": 60 // Query resolution step width in seconds +} +``` + +In addition to `execute_prometheus_range_query`, HolmesGPT can generate similar results with an `execute_prometheus_instant_query` which is an [instant query](https://prometheus.io/docs/prometheus/latest/querying/api/#instant-queries): + +``` +Here's the average HTTP request latency over time for the `customer-orders-service`: + +<< {type: "promql", tool_name: "execute_prometheus_instant_query", random_key: "9kLK"} >> +``` + +```json +{ + "status": "success", + "random_key": "2KiL", + "tool_name": "execute_prometheus_instant_query", + "description": "Average HTTP request latency for customer-orders-service", + "query": "rate(http_request_duration_seconds_sum{service=\"customer-orders-service\"}[5m]) / rate(http_request_duration_seconds_count{service=\"customer-orders-service\"}[5m])" +} +``` + +Unlike the range query, the instant query result lacks the `start`, `end` and `step` arguments. diff --git a/docs/installation.md b/docs/installation.md index c3e4fa3b..504344f5 100644 --- a/docs/installation.md +++ b/docs/installation.md @@ -38,10 +38,16 @@ brew install holmesgpt holmes --help ``` +4. Apply an example Pod to Kubernetes with an error that Holmes can investigate: + +```sh +kubectl apply -f https://raw.githubusercontent.com/robusta-dev/kubernetes-demos/main/pending_pods/pending_pod_node_selector.yaml +``` + 4. Run holmesgpt: ```sh -holmes ask "what issues do I have in my cluster" +holmes ask "what is wrong with the user-profile-import pod?" ``` diff --git a/helm/holmes/templates/holmes.yaml b/helm/holmes/templates/holmes.yaml index 0582a058..35478c39 100644 --- a/helm/holmes/templates/holmes.yaml +++ b/helm/holmes/templates/holmes.yaml @@ -14,6 +14,9 @@ spec: metadata: labels: app: holmes + annotations: + # checksum annotation triggering pod reload when .Values.toolsets changes by helm upgrade + checksum/toolset-config: {{ toYaml .Values.toolsets | sha256sum }} spec: {{- if .Values.customServiceAccountName }} serviceAccountName: {{ .Values.customServiceAccountName }} diff --git a/holmes/plugins/prompts/generic_ask_conversation.jinja2 b/holmes/plugins/prompts/generic_ask_conversation.jinja2 index b411dd2a..ef97a600 100644 --- a/holmes/plugins/prompts/generic_ask_conversation.jinja2 +++ b/holmes/plugins/prompts/generic_ask_conversation.jinja2 @@ -11,7 +11,7 @@ Use conversation history to maintain continuity when appropriate, ensuring effic Prometheus/PromQL queries * Use prometheus to execute promql queries with the tools `execute_prometheus_instant_query` and `execute_prometheus_range_query` * ALWAYS embed the execution results into your answer -* You only need to embed the partial result in your response. Include the `tool_name` and `random_key`. For example: << {tool_name: "execute_prometheus_query", random_key: "92jf2hf"} >> +* You only need to embed the partial result in your response. Include the `tool_name` and `random_key`. For example: << {type: "promql", tool_name: "execute_prometheus_query", random_key: "92jf2hf"} >> * Use these tools to generate charts that users can see. Here are standard metrics but you can use different ones: ** For memory consumption: `container_memory_working_set_bytes` ** For CPU usage: `container_cpu_usage_seconds_total` diff --git a/tests/llm/fixtures/test_ask_holmes/29_basic_promql_graph_cluster_memory/test_case.yaml b/tests/llm/fixtures/test_ask_holmes/29_basic_promql_graph_cluster_memory/test_case.yaml deleted file mode 100644 index 89a1fb3e..00000000 --- a/tests/llm/fixtures/test_ask_holmes/29_basic_promql_graph_cluster_memory/test_case.yaml +++ /dev/null @@ -1,5 +0,0 @@ -user_prompt: "Show me a graph of the memory usage for the kubernetes cluster over the last 30 minutes" -expected_output: - - 'The answer should include something similar to << { tool_name: "execute_prometheus_range_query", random_key: "AtEJ" } >>. Both tool_name and random_key MUST absolutely be present and the same as mentioned here.' -evaluation: - correctness: 1 diff --git a/tests/llm/fixtures/test_ask_holmes/29_events_from_alert_manager/kubectl_events.txt b/tests/llm/fixtures/test_ask_holmes/29_events_from_alert_manager/kubectl_events.txt new file mode 100644 index 00000000..d74434a9 --- /dev/null +++ b/tests/llm/fixtures/test_ask_holmes/29_events_from_alert_manager/kubectl_events.txt @@ -0,0 +1,15 @@ +{"toolset_name":"kubernetes/core","tool_name":"kubectl_events","match_params":{"resource_type":"pod","pod_name":"alertmanager-robusta-kube-prometheus-st-alertmanager-0","namespace":"monitoring"}} +stdout: +LAST SEEN TYPE REASON OBJECT MESSAGE +3m52s Normal Scheduled Pod/alertmanager-robusta-kube-prometheus-st-alertmanager-0 Successfully assigned monitoring/alertmanager-robusta-kube-prometheus-st-alertmanager-0 to nicolas-local-rbac-azure-control-plane +3m52s Normal Pulled Pod/alertmanager-robusta-kube-prometheus-st-alertmanager-0 Container image "quay.io/prometheus-operator/prometheus-config-reloader:v0.70.0" already present on machine +3m52s Normal Created Pod/alertmanager-robusta-kube-prometheus-st-alertmanager-0 Created container init-config-reloader +3m52s Normal Started Pod/alertmanager-robusta-kube-prometheus-st-alertmanager-0 Started container init-config-reloader +3m51s Normal Pulled Pod/alertmanager-robusta-kube-prometheus-st-alertmanager-0 Container image "quay.io/prometheus/alertmanager:v0.26.0" already present on machine +3m51s Normal Created Pod/alertmanager-robusta-kube-prometheus-st-alertmanager-0 Created container alertmanager +3m51s Normal Started Pod/alertmanager-robusta-kube-prometheus-st-alertmanager-0 Started container alertmanager +3m51s Normal Pulled Pod/alertmanager-robusta-kube-prometheus-st-alertmanager-0 Container image "quay.io/prometheus-operator/prometheus-config-reloader:v0.70.0" already present on machine +3m51s Normal Created Pod/alertmanager-robusta-kube-prometheus-st-alertmanager-0 Created container config-reloader +3m50s Normal Started Pod/alertmanager-robusta-kube-prometheus-st-alertmanager-0 Started container config-reloader + +stderr: diff --git a/tests/llm/fixtures/test_ask_holmes/29_events_from_alert_manager/kubectl_find_resource.txt b/tests/llm/fixtures/test_ask_holmes/29_events_from_alert_manager/kubectl_find_resource.txt new file mode 100644 index 00000000..cc1da7d5 --- /dev/null +++ b/tests/llm/fixtures/test_ask_holmes/29_events_from_alert_manager/kubectl_find_resource.txt @@ -0,0 +1,5 @@ +{"toolset_name":"kubernetes/core","tool_name":"kubectl_find_resource","match_params":{"kind":"pod","keyword":"alertmanager"}} +stdout: +monitoring alertmanager-robusta-kube-prometheus-st-alertmanager-0 2/2 Running 0 3m54s 10.244.0.30 nicolas-local-rbac-azure-control-plane alertmanager=robusta-kube-prometheus-st-alertmanager,app.kubernetes.io/instance=robusta-kube-prometheus-st-alertmanager,app.kubernetes.io/managed-by=prometheus-operator,app.kubernetes.io/name=alertmanager,app.kubernetes.io/version=0.26.0,apps.kubernetes.io/pod-index=0,controller-revision-hash=alertmanager-robusta-kube-prometheus-st-alertmanager-7fd84555d5,statefulset.kubernetes.io/pod-name=alertmanager-robusta-kube-prometheus-st-alertmanager-0 + +stderr: diff --git a/tests/llm/fixtures/test_ask_holmes/29_events_from_alert_manager/test_case.yaml b/tests/llm/fixtures/test_ask_holmes/29_events_from_alert_manager/test_case.yaml new file mode 100644 index 00000000..9116ef3f --- /dev/null +++ b/tests/llm/fixtures/test_ask_holmes/29_events_from_alert_manager/test_case.yaml @@ -0,0 +1,5 @@ +user_prompt: "Fetch all kubernetes events related to the alertmanager pod" +expected_output: + - To answer should list events with at least a mention of the container `alertmanager` being created. +evaluation: + correctness: 1 diff --git a/tests/llm/fixtures/test_ask_holmes/29_basic_promql_graph_cluster_memory/execute_prometheus_range_query.txt b/tests/llm/fixtures/test_ask_holmes/30_basic_promql_graph_cluster_memory/execute_prometheus_range_query.txt similarity index 100% rename from tests/llm/fixtures/test_ask_holmes/29_basic_promql_graph_cluster_memory/execute_prometheus_range_query.txt rename to tests/llm/fixtures/test_ask_holmes/30_basic_promql_graph_cluster_memory/execute_prometheus_range_query.txt diff --git a/tests/llm/fixtures/test_ask_holmes/29_basic_promql_graph_cluster_memory/get_current_time.txt b/tests/llm/fixtures/test_ask_holmes/30_basic_promql_graph_cluster_memory/get_current_time.txt similarity index 100% rename from tests/llm/fixtures/test_ask_holmes/29_basic_promql_graph_cluster_memory/get_current_time.txt rename to tests/llm/fixtures/test_ask_holmes/30_basic_promql_graph_cluster_memory/get_current_time.txt diff --git a/tests/llm/fixtures/test_ask_holmes/29_basic_promql_graph_cluster_memory/list_available_metrics.txt b/tests/llm/fixtures/test_ask_holmes/30_basic_promql_graph_cluster_memory/list_available_metrics.txt similarity index 100% rename from tests/llm/fixtures/test_ask_holmes/29_basic_promql_graph_cluster_memory/list_available_metrics.txt rename to tests/llm/fixtures/test_ask_holmes/30_basic_promql_graph_cluster_memory/list_available_metrics.txt diff --git a/tests/llm/fixtures/test_ask_holmes/30_basic_promql_graph_cluster_memory/test_case.yaml b/tests/llm/fixtures/test_ask_holmes/30_basic_promql_graph_cluster_memory/test_case.yaml new file mode 100644 index 00000000..45d1f8b9 --- /dev/null +++ b/tests/llm/fixtures/test_ask_holmes/30_basic_promql_graph_cluster_memory/test_case.yaml @@ -0,0 +1,5 @@ +user_prompt: "Show me a graph of the memory usage for the kubernetes cluster over the last 30 minutes" +expected_output: + - 'The answer should include something similar to << { type: "promql", tool_name: "execute_prometheus_range_query", random_key: "AtEJ" } >>. Both tool_name and random_key MUST absolutely be present and the same as mentioned here.' +evaluation: + correctness: 2 diff --git a/tests/llm/fixtures/test_ask_holmes/30_basic_promql_graph_pod_memory/test_case.yaml b/tests/llm/fixtures/test_ask_holmes/30_basic_promql_graph_pod_memory/test_case.yaml deleted file mode 100644 index e8e5fae8..00000000 --- a/tests/llm/fixtures/test_ask_holmes/30_basic_promql_graph_pod_memory/test_case.yaml +++ /dev/null @@ -1,5 +0,0 @@ -user_prompt: "Show me a graph of the memory usage for the pod robusta-holmes over the last 30 minutes" -expected_output: - - 'The answer should include something similar to << { tool_name: "execute_prometheus_range_query", random_key: "vwJA" } >>. Both tool_name and random_key MUST absolutely be present and the same as mentioned here.' -evaluation: - correctness: 1 diff --git a/tests/llm/fixtures/test_ask_holmes/31_basic_promql_graph_pod_cpu/test_case.yaml b/tests/llm/fixtures/test_ask_holmes/31_basic_promql_graph_pod_cpu/test_case.yaml deleted file mode 100644 index b4e1e221..00000000 --- a/tests/llm/fixtures/test_ask_holmes/31_basic_promql_graph_pod_cpu/test_case.yaml +++ /dev/null @@ -1,5 +0,0 @@ -user_prompt: "Show me a graph of the CPU usage for the pod robusta-holmes over the last 30 minutes" -expected_output: - - 'The answer should include something similar to << { tool_name: "execute_prometheus_range_query", random_key: "HBGf" } >>. Both tool_name and random_key MUST absolutely be present and either the same as mentioned here or Rs0H.' -evaluation: - correctness: 1 diff --git a/tests/llm/fixtures/test_ask_holmes/30_basic_promql_graph_pod_memory/execute_prometheus_range_query.txt b/tests/llm/fixtures/test_ask_holmes/31_basic_promql_graph_pod_memory/execute_prometheus_range_query.txt similarity index 100% rename from tests/llm/fixtures/test_ask_holmes/30_basic_promql_graph_pod_memory/execute_prometheus_range_query.txt rename to tests/llm/fixtures/test_ask_holmes/31_basic_promql_graph_pod_memory/execute_prometheus_range_query.txt diff --git a/tests/llm/fixtures/test_ask_holmes/30_basic_promql_graph_pod_memory/get_current_time.txt b/tests/llm/fixtures/test_ask_holmes/31_basic_promql_graph_pod_memory/get_current_time.txt similarity index 100% rename from tests/llm/fixtures/test_ask_holmes/30_basic_promql_graph_pod_memory/get_current_time.txt rename to tests/llm/fixtures/test_ask_holmes/31_basic_promql_graph_pod_memory/get_current_time.txt diff --git a/tests/llm/fixtures/test_ask_holmes/30_basic_promql_graph_pod_memory/kubectl_find_resource.txt b/tests/llm/fixtures/test_ask_holmes/31_basic_promql_graph_pod_memory/kubectl_find_resource.txt similarity index 100% rename from tests/llm/fixtures/test_ask_holmes/30_basic_promql_graph_pod_memory/kubectl_find_resource.txt rename to tests/llm/fixtures/test_ask_holmes/31_basic_promql_graph_pod_memory/kubectl_find_resource.txt diff --git a/tests/llm/fixtures/test_ask_holmes/30_basic_promql_graph_pod_memory/list_available_metrics.txt b/tests/llm/fixtures/test_ask_holmes/31_basic_promql_graph_pod_memory/list_available_metrics.txt similarity index 100% rename from tests/llm/fixtures/test_ask_holmes/30_basic_promql_graph_pod_memory/list_available_metrics.txt rename to tests/llm/fixtures/test_ask_holmes/31_basic_promql_graph_pod_memory/list_available_metrics.txt diff --git a/tests/llm/fixtures/test_ask_holmes/31_basic_promql_graph_pod_memory/test_case.yaml b/tests/llm/fixtures/test_ask_holmes/31_basic_promql_graph_pod_memory/test_case.yaml new file mode 100644 index 00000000..aac2b923 --- /dev/null +++ b/tests/llm/fixtures/test_ask_holmes/31_basic_promql_graph_pod_memory/test_case.yaml @@ -0,0 +1,5 @@ +user_prompt: "Show me a graph of the memory usage for the pod robusta-holmes over the last 30 minutes" +expected_output: + - 'The answer should include something similar to << { type: "promql", tool_name: "execute_prometheus_range_query", random_key: "vwJA" } >>. Both tool_name and random_key MUST absolutely be present and the same as mentioned here.' +evaluation: + correctness: 2 diff --git a/tests/llm/fixtures/test_ask_holmes/31_basic_promql_graph_pod_cpu/execute_prometheus_range_query.txt b/tests/llm/fixtures/test_ask_holmes/32_basic_promql_graph_pod_cpu/execute_prometheus_range_query.txt similarity index 100% rename from tests/llm/fixtures/test_ask_holmes/31_basic_promql_graph_pod_cpu/execute_prometheus_range_query.txt rename to tests/llm/fixtures/test_ask_holmes/32_basic_promql_graph_pod_cpu/execute_prometheus_range_query.txt diff --git a/tests/llm/fixtures/test_ask_holmes/31_basic_promql_graph_pod_cpu/execute_prometheus_range_query_with_namespace.txt b/tests/llm/fixtures/test_ask_holmes/32_basic_promql_graph_pod_cpu/execute_prometheus_range_query_with_namespace.txt similarity index 100% rename from tests/llm/fixtures/test_ask_holmes/31_basic_promql_graph_pod_cpu/execute_prometheus_range_query_with_namespace.txt rename to tests/llm/fixtures/test_ask_holmes/32_basic_promql_graph_pod_cpu/execute_prometheus_range_query_with_namespace.txt diff --git a/tests/llm/fixtures/test_ask_holmes/31_basic_promql_graph_pod_cpu/get_current_time.txt b/tests/llm/fixtures/test_ask_holmes/32_basic_promql_graph_pod_cpu/get_current_time.txt similarity index 100% rename from tests/llm/fixtures/test_ask_holmes/31_basic_promql_graph_pod_cpu/get_current_time.txt rename to tests/llm/fixtures/test_ask_holmes/32_basic_promql_graph_pod_cpu/get_current_time.txt diff --git a/tests/llm/fixtures/test_ask_holmes/31_basic_promql_graph_pod_cpu/kubectl_find_resource.txt b/tests/llm/fixtures/test_ask_holmes/32_basic_promql_graph_pod_cpu/kubectl_find_resource.txt similarity index 100% rename from tests/llm/fixtures/test_ask_holmes/31_basic_promql_graph_pod_cpu/kubectl_find_resource.txt rename to tests/llm/fixtures/test_ask_holmes/32_basic_promql_graph_pod_cpu/kubectl_find_resource.txt diff --git a/tests/llm/fixtures/test_ask_holmes/31_basic_promql_graph_pod_cpu/list_available_metrics.txt b/tests/llm/fixtures/test_ask_holmes/32_basic_promql_graph_pod_cpu/list_available_metrics.txt similarity index 100% rename from tests/llm/fixtures/test_ask_holmes/31_basic_promql_graph_pod_cpu/list_available_metrics.txt rename to tests/llm/fixtures/test_ask_holmes/32_basic_promql_graph_pod_cpu/list_available_metrics.txt diff --git a/tests/llm/fixtures/test_ask_holmes/32_basic_promql_graph_pod_cpu/test_case.yaml b/tests/llm/fixtures/test_ask_holmes/32_basic_promql_graph_pod_cpu/test_case.yaml new file mode 100644 index 00000000..a14dfec6 --- /dev/null +++ b/tests/llm/fixtures/test_ask_holmes/32_basic_promql_graph_pod_cpu/test_case.yaml @@ -0,0 +1,5 @@ +user_prompt: "Show me a graph of the CPU usage for the pod robusta-holmes over the last 30 minutes" +expected_output: + - 'The answer should include something similar to << { type: "promql", tool_name: "execute_prometheus_range_query", random_key: "HBGf" } >>. Both tool_name and random_key MUST absolutely be present and either the same as mentioned here or Rs0H.' +evaluation: + correctness: 2 diff --git a/tests/llm/fixtures/test_ask_holmes/32_http_latency_graph/execute_prometheus_range_query.txt b/tests/llm/fixtures/test_ask_holmes/33_http_latency_graph/execute_prometheus_range_query.txt similarity index 100% rename from tests/llm/fixtures/test_ask_holmes/32_http_latency_graph/execute_prometheus_range_query.txt rename to tests/llm/fixtures/test_ask_holmes/33_http_latency_graph/execute_prometheus_range_query.txt diff --git a/tests/llm/fixtures/test_ask_holmes/32_http_latency_graph/execute_prometheus_range_query_2.txt b/tests/llm/fixtures/test_ask_holmes/33_http_latency_graph/execute_prometheus_range_query_2.txt similarity index 100% rename from tests/llm/fixtures/test_ask_holmes/32_http_latency_graph/execute_prometheus_range_query_2.txt rename to tests/llm/fixtures/test_ask_holmes/33_http_latency_graph/execute_prometheus_range_query_2.txt diff --git a/tests/llm/fixtures/test_ask_holmes/32_http_latency_graph/execute_prometheus_range_query_median.txt b/tests/llm/fixtures/test_ask_holmes/33_http_latency_graph/execute_prometheus_range_query_median.txt similarity index 100% rename from tests/llm/fixtures/test_ask_holmes/32_http_latency_graph/execute_prometheus_range_query_median.txt rename to tests/llm/fixtures/test_ask_holmes/33_http_latency_graph/execute_prometheus_range_query_median.txt diff --git a/tests/llm/fixtures/test_ask_holmes/32_http_latency_graph/get_current_time.txt b/tests/llm/fixtures/test_ask_holmes/33_http_latency_graph/get_current_time.txt similarity index 100% rename from tests/llm/fixtures/test_ask_holmes/32_http_latency_graph/get_current_time.txt rename to tests/llm/fixtures/test_ask_holmes/33_http_latency_graph/get_current_time.txt diff --git a/tests/llm/fixtures/test_ask_holmes/32_http_latency_graph/helm/Dockerfile b/tests/llm/fixtures/test_ask_holmes/33_http_latency_graph/helm/Dockerfile similarity index 100% rename from tests/llm/fixtures/test_ask_holmes/32_http_latency_graph/helm/Dockerfile rename to tests/llm/fixtures/test_ask_holmes/33_http_latency_graph/helm/Dockerfile diff --git a/tests/llm/fixtures/test_ask_holmes/32_http_latency_graph/helm/app.py b/tests/llm/fixtures/test_ask_holmes/33_http_latency_graph/helm/app.py similarity index 100% rename from tests/llm/fixtures/test_ask_holmes/32_http_latency_graph/helm/app.py rename to tests/llm/fixtures/test_ask_holmes/33_http_latency_graph/helm/app.py diff --git a/tests/llm/fixtures/test_ask_holmes/32_http_latency_graph/helm/build.sh b/tests/llm/fixtures/test_ask_holmes/33_http_latency_graph/helm/build.sh similarity index 100% rename from tests/llm/fixtures/test_ask_holmes/32_http_latency_graph/helm/build.sh rename to tests/llm/fixtures/test_ask_holmes/33_http_latency_graph/helm/build.sh diff --git a/tests/llm/fixtures/test_ask_holmes/32_http_latency_graph/helm/manifest.yaml b/tests/llm/fixtures/test_ask_holmes/33_http_latency_graph/helm/manifest.yaml similarity index 100% rename from tests/llm/fixtures/test_ask_holmes/32_http_latency_graph/helm/manifest.yaml rename to tests/llm/fixtures/test_ask_holmes/33_http_latency_graph/helm/manifest.yaml diff --git a/tests/llm/fixtures/test_ask_holmes/32_http_latency_graph/helm/requirements.txt b/tests/llm/fixtures/test_ask_holmes/33_http_latency_graph/helm/requirements.txt similarity index 100% rename from tests/llm/fixtures/test_ask_holmes/32_http_latency_graph/helm/requirements.txt rename to tests/llm/fixtures/test_ask_holmes/33_http_latency_graph/helm/requirements.txt diff --git a/tests/llm/fixtures/test_ask_holmes/32_http_latency_graph/kubectl_find_resource.txt b/tests/llm/fixtures/test_ask_holmes/33_http_latency_graph/kubectl_find_resource.txt similarity index 100% rename from tests/llm/fixtures/test_ask_holmes/32_http_latency_graph/kubectl_find_resource.txt rename to tests/llm/fixtures/test_ask_holmes/33_http_latency_graph/kubectl_find_resource.txt diff --git a/tests/llm/fixtures/test_ask_holmes/32_http_latency_graph/list_available_metrics.txt b/tests/llm/fixtures/test_ask_holmes/33_http_latency_graph/list_available_metrics.txt similarity index 100% rename from tests/llm/fixtures/test_ask_holmes/32_http_latency_graph/list_available_metrics.txt rename to tests/llm/fixtures/test_ask_holmes/33_http_latency_graph/list_available_metrics.txt diff --git a/tests/llm/fixtures/test_ask_holmes/32_http_latency_graph/test_case.yaml b/tests/llm/fixtures/test_ask_holmes/33_http_latency_graph/test_case.yaml similarity index 56% rename from tests/llm/fixtures/test_ask_holmes/32_http_latency_graph/test_case.yaml rename to tests/llm/fixtures/test_ask_holmes/33_http_latency_graph/test_case.yaml index 8e335408..66f41ed4 100644 --- a/tests/llm/fixtures/test_ask_holmes/32_http_latency_graph/test_case.yaml +++ b/tests/llm/fixtures/test_ask_holmes/33_http_latency_graph/test_case.yaml @@ -1,7 +1,7 @@ user_prompt: "Show me the http request latency over time for the service customer-orders-service?" expected_output: - - 'The answer should include something similar to << { tool_name: "execute_prometheus_range_query", random_key: "" } >>.' + - 'The answer should include something similar to << { type: "promql", tool_name: "execute_prometheus_range_query", random_key: "" } >>.' before_test: kubectl apply -f ./helm/manifest.yaml after_test: kubectl delete -f ./helm/manifest.yaml evaluation: - correctness: 1 + correctness: 2 diff --git a/tests/llm/test_ask_holmes.py b/tests/llm/test_ask_holmes.py index 2d12400e..bbdccba1 100644 --- a/tests/llm/test_ask_holmes.py +++ b/tests/llm/test_ask_holmes.py @@ -82,12 +82,13 @@ def test_ask_holmes(experiment_name, test_case): output = result.result expected = test_case.expected_output - scores = {} + print(result.model_dump_json()) if not isinstance(expected, list): expected = [expected] debug_expected = "\n- ".join(expected) + scores = {} print(f"** EXPECTED **\n- {debug_expected}") correctness_eval = evaluate_correctness(output=output, expected_elements=expected) print( diff --git a/tool_call.json b/tool_call.json new file mode 100644 index 00000000..42957c7a --- /dev/null +++ b/tool_call.json @@ -0,0 +1,10 @@ +{ + "status": "success", + "random_key": "9kLK", + "tool_name": "execute_prometheus_range_query", + "description": "Average HTTP request latency for customer-orders-service", + "query": "rate(http_request_duration_seconds_sum{service=\"customer-orders-service\"}[5m]) / rate(http_request_duration_seconds_count{service=\"customer-orders-service\"}[5m])", + "start": "1739705559", + "end": "1739791959", + "step": 60 +} From 8b7cad0e7a619b892cc777f83dc0c70828980a31 Mon Sep 17 00:00:00 2001 From: Nicolas Herment Date: Tue, 25 Feb 2025 07:42:18 +0100 Subject: [PATCH 13/14] remove unused code --- tests/llm/test_ask_holmes.py | 2 -- tool_call.json | 10 ---------- 2 files changed, 12 deletions(-) delete mode 100644 tool_call.json diff --git a/tests/llm/test_ask_holmes.py b/tests/llm/test_ask_holmes.py index bbdccba1..9e2eba22 100644 --- a/tests/llm/test_ask_holmes.py +++ b/tests/llm/test_ask_holmes.py @@ -82,8 +82,6 @@ def test_ask_holmes(experiment_name, test_case): output = result.result expected = test_case.expected_output - print(result.model_dump_json()) - if not isinstance(expected, list): expected = [expected] diff --git a/tool_call.json b/tool_call.json deleted file mode 100644 index 42957c7a..00000000 --- a/tool_call.json +++ /dev/null @@ -1,10 +0,0 @@ -{ - "status": "success", - "random_key": "9kLK", - "tool_name": "execute_prometheus_range_query", - "description": "Average HTTP request latency for customer-orders-service", - "query": "rate(http_request_duration_seconds_sum{service=\"customer-orders-service\"}[5m]) / rate(http_request_duration_seconds_count{service=\"customer-orders-service\"}[5m])", - "start": "1739705559", - "end": "1739791959", - "step": 60 -} From 7991c8e84799f45c658e6669d5125bf735ccc711 Mon Sep 17 00:00:00 2001 From: Nicolas Herment Date: Tue, 25 Feb 2025 09:04:32 +0100 Subject: [PATCH 14/14] doc: fix typo, missing code block closure --- FEATURES.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/FEATURES.md b/FEATURES.md index 0830112f..035e756d 100644 --- a/FEATURES.md +++ b/FEATURES.md @@ -85,11 +85,11 @@ HolmesGPT text response: Here's the average HTTP request latency over time for the `customer-orders-service`: << {type: "promql", tool_name: "execute_prometheus_range_query", random_key: "9kLK"} >> -`` +``` In addition to this text response, the returned JSON will contain one or more tool calls, including the prometheus query: -``` +```json "tool_calls": [ { "tool_call_id": "call_lKI7CQW6Y2n1ZQ5dlxX79TcM",