Skip to content

Commit

Permalink
Merge branch 'master' into configure_polarion_result_dir
Browse files Browse the repository at this point in the history
  • Loading branch information
elfiesmelfie authored Nov 26, 2024
2 parents 637d17b + 2f49338 commit f593108
Show file tree
Hide file tree
Showing 16 changed files with 102 additions and 110 deletions.
10 changes: 10 additions & 0 deletions .zuul.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,15 @@
- callback_plugins/.*
- README*
- .*/*.md
- roles/client_side_tests/.*
- roles/test_alerts/.*
- roles/test_collectd/.*
- roles/test_metrics_retention/.*
- roles/test_observability_strategy/.*
- roles/test_qdr/.*
- roles/test_sensubility/.*
- roles/test_snmp_traps/.*
- roles/test_verify_email/.*

- job:
name: functional-logging-tests-osp18
Expand Down Expand Up @@ -100,6 +109,7 @@
- feature-verification-tests-noop
- openstack-k8s-operators-content-provider:
override-checkout: main
irrelevant-files: *irrelevant_files
- functional-tests-on-osp18
- functional-logging-tests-osp18
- functional-graphing-tests-osp18
Expand Down
30 changes: 10 additions & 20 deletions roles/client_side_tests/tasks/test_e2e.yml
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,7 @@
ansible.builtin.include_tasks:
file: get_prom_info.yml

- name: RHELOSP-37759
# Description: Query Prometheus for collectd_cpu_percent metrics and save the output into the file
- name: RHELOSP-37759 Query Prometheus for collectd_cpu_percent metrics and save the output into the file
ansible.builtin.shell:
cmd: >-
/usr/bin/curl -k {{ prom_auth_string }} \
Expand All @@ -16,8 +15,7 @@
failed_when:
- checkmyconf.rc !=0

- name: RHELOSP-57528
# Description: Query Prometheus for ceph_ceph_bytes metrics and save the output into the file
- name: RHELOSP-57528 Query Prometheus for ceph_ceph_bytes metrics and save the output into the file
ansible.builtin.shell:
cmd: >-
/usr/bin/curl -k {{ prom_auth_string }} \
Expand All @@ -29,8 +27,7 @@
failed_when:
- checkmyconf.rc !=0

- name: RHELOSP-57536
# Description: Query Prometheus for collectd_interface_if_packets_tx_total metrics and save the output into the file
- name: RHELOSP-57536 Query Prometheus for collectd_interface_if_packets_tx_total metrics and save the output into the file
ansible.builtin.shell:
cmd: >-
/usr/bin/curl -k {{ prom_auth_string }} \
Expand All @@ -42,8 +39,7 @@
failed_when:
- checkmyconf.rc !=0

- name: RHELOSP-37762
# Description: Query Prometheus for collectd_memory metrics and save the output into the file
- name: RHELOSP-37762 Query Prometheus for collectd_memory metrics and save the output into the file
ansible.builtin.shell:
cmd: >-
/usr/bin/curl -k {{ prom_auth_string }} \
Expand All @@ -55,8 +51,7 @@
failed_when:
- checkmyconf.rc !=0

- name: RHELOSP-117539
# Description: Query Prometheus for collectd_load_longterm metrics and save the output into the file
- name: RHELOSP-117539 Query Prometheus for collectd_load_longterm metrics and save the output into the file
ansible.builtin.shell:
cmd: >-
/usr/bin/curl -k {{ prom_auth_string }} \
Expand All @@ -68,8 +63,7 @@
failed_when:
- checkmyconf.rc !=0

- name: RHELOSP-37757
# Description: Read content of query_ceph_ceph_bytes file and check that metrics are present
- name: RHELOSP-37757 Read content of query_ceph_ceph_bytes file and check that metrics are present
ansible.builtin.command:
cmd: >-
egrep 'ceph' /tmp/query_ceph_ceph_bytes
Expand All @@ -78,8 +72,7 @@
failed_when:
- checkmyconf.rc != 0

- name: RHELOSP-37218
# Description: Read content of query_collectd_interface_tx_total file and check that metrics are present
- name: RHELOSP-37218 Read content of query_collectd_interface_tx_total file and check that metrics are present
ansible.builtin.shell:
cmd: >-
egrep 'controller-0|controller-1|controller-2|compute-0|compute-1|ceph-0' /tmp/query_collectd_interface_tx_total
Expand All @@ -88,8 +81,7 @@
failed_when:
- checkmyconf.rc != 0

- name: RHELOSP-37636
# Description: Read contents of query_collectd_cpu_percent file and check that metrics are present
- name: RHELOSP-37636 Read contents of query_collectd_cpu_percent file and check that metrics are present
ansible.builtin.shell:
cmd: >-
egrep 'controller-0|controller-1|controller-2|compute-0|compute-1|ceph-0' /tmp/query_collectd_cpu_percent
Expand All @@ -99,8 +91,7 @@
- checkmyconf.rc != 0


- name: RHELOSP-37670
# Description: Read content of query_collectd_memory file and check that metrics are present
- name: RHELOSP-37670 Read content of query_collectd_memory file and check that metrics are present
ansible.builtin.shell:
cmd: >-
egrep 'controller-0|controller-1|controller-2|compute-0|compute-1|ceph-0' /tmp/query_collectd_memory
Expand All @@ -110,8 +101,7 @@
- checkmyconf.rc != 0


- name: RHELOSP-37224
# Description: Read contents of query_load_longterm file and check that metrics are present
- name: RHELOSP-37224 Read contents of query_load_longterm file and check that metrics are present
ansible.builtin.shell:
cmd: >-
egrep 'controller-0|controller-1|controller-2|compute-0|compute-1|ceph-0' /tmp/query_load_longterm
Expand Down
2 changes: 1 addition & 1 deletion roles/telemetry_graphing/files/cypress.config.js
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
const { defineConfig } = require('cypress')
module.exports = defineConfig({
e2e: {
baseUrl: 'https://console-openshift-console.apps-crc.testing',
baseUrl: 'https://console-openshift-console.apps-crc.testing/login',
specPattern: 'cypress/integration/**/*.{js,jsx,ts,tsx}',
supportFile: false,
},
Expand Down
7 changes: 3 additions & 4 deletions roles/telemetry_graphing/files/dashboard-openstack-cloud.js
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ describe('OpenShift Console Dashboard Test', () => {
}
});


});

it('should load and validate the OpenStack dashboards', () => {
Expand All @@ -36,11 +37,9 @@ describe('OpenShift Console Dashboard Test', () => {
dashboards.forEach(dashboard => {
cy.visit(`https://console-openshift-console.apps-crc.testing/monitoring/dashboards${dashboard.url}`);

cy.wait(5000);

// Wait for the dashboard to load and take a screenshot
cy.get('div[data-test-id="dashboard"]', { timeout: 10000 })
.find('[data-test-id="panel-"], [data-test-id="panel-cpu"], [data-test-id="panel-overview"]')
cy.get('div[data-test-id="dashboard"]', { timeout: 50000 })
.find('[data-test-id^="panel-"]')

cy.wait(5000);
cy.screenshot(dashboard.screenshot);
Expand Down
12 changes: 12 additions & 0 deletions roles/test_alerts/tasks/get_observability_api.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
---
- name: "Get the observability strategy"
ansible.builtin.shell:
cmd: |
oc get stf default -ojsonpath='{.spec.observabilityStrategy}'
changed_when: false
register: observability_strategy

- name: "Set the observability api based on the observability strategy"
ansible.builtin.set_fact:
observability_api: "{{ 'monitoring.rhobs' if observability_strategy.stdout == 'use_redhat' else 'monitoring.coreos.com' }}"

11 changes: 11 additions & 0 deletions roles/test_alerts/tasks/main.yml
Original file line number Diff line number Diff line change
@@ -1,4 +1,15 @@
---
- name: "Set the prom auth"
ansible.builtin.include_role:
name: client_side_tests
tasks_from: get_prom_info.yml
vars:
prom_auth_method: token

- name: "Get the observability strategy and set the observability_api"
ansible.builtin.include_tasks:
file: get_observability_api.yml

- name: "Test Creating an alert rule in Prometheus"
ansible.builtin.include_tasks:
file: test_create_an_alert.yml
Expand Down
8 changes: 3 additions & 5 deletions roles/test_alerts/tasks/test_create_an_alert.yml
Original file line number Diff line number Diff line change
Expand Up @@ -3,12 +3,11 @@
# Assuming we're in the right project already...
# Following procedure on https://infrawatch.github.io/documentation/#creating-an-alert-rule-in-prometheus_assembly-advanced-features

- name: "RHELOSP-144965"
# description: Create the alert
- name: "RHELOSP-144965 Create the alert"
ansible.builtin.shell:
cmd: |
oc apply -f - <<EOF
apiVersion: monitoring.coreos.com/v1
apiVersion: {{ observability_api }}/v1
kind: PrometheusRule
metadata:
creationTimestamp: null
Expand All @@ -28,8 +27,7 @@
register: cmd_output
failed_when: cmd_output.rc != 0

- name: "RHELOSP-144480"
# description: Check that the alert was created
- name: "RHELOSP-144480 Check that the alert was created"
ansible.builtin.command:
cmd: |
curl -k {{ prom_auth_string }} https://{{ prom_url }}/api/v1/rules
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,7 @@

# Pre-check: is the value of global.timeout = 5m in the alertmanager secret
# TODO: put the patch into a file. and use --patch-file instead of -p OR slurp the file from files/
- name: "RHELOSP-144965"
# description: "Patch the ServiceTelemetry object for the STF deployment"
- name: "RHELOSP-144965 Patch the ServiceTelemetry object for the STF deployment"
ansible.builtin.shell:
cmd: |
oc patch stf default --type merge -p '{"spec": {"alertmanagerConfigManifest": "apiVersion: v1\nkind: Secret\nmetadata:\n name: 'alertmanager-default'\n namespace: 'service-telemetry'\ntype: Opaque\nstringData:\n alertmanager.yaml: |-\n global:\n resolve_timeout: 10m\n route:\n group_by: ['job']\n group_wait: 30s\n group_interval: 5m\n repeat_interval: 12h\n receiver: 'null'\n receivers:\n - name: 'null'\n"}}'
Expand Down Expand Up @@ -34,16 +33,14 @@
ansible.builtin.debug:
var: alertmanager_secret

- name: "RHELOSP-148697"
# description: Interrupt metrics flow by preventing the QDR from running
- name: "RHELOSP-148697 Interrupt metrics flow by preventing the QDR from running"
ansible.builtin.shell:
cmd: |
for i in {1..15}; do oc delete po -l application=default-interconnect; sleep 1; done
changed_when: false


- name: "RHELOSP-148698"
# description: Verify that the alert is active in Alertmanager
- name: "RHELOSP-148698 Verify that the alert is active in Alertmanager"
ansible.builtin.shell:
cmd: >-
oc exec -it prometheus-default-0 -c prometheus -- /bin/sh -c 'wget --header \
Expand All @@ -53,8 +50,7 @@
changed_when: false
failed_when: cmd_output.stdout|int == 0

- name: "RHELOSP-148699"
# description: "Verify that the alert is firing in Prometheus"
- name: "RHELOSP-148699 Verify that the alert is firing in Prometheus"
ansible.builtin.shell:
cmd: >-
/usr/bin/curl -k {{ prom_auth_string }} -g https://{{ prom_url }}/api/v1/alerts | grep 'firing' | grep 'Collectd metrics receive rate is zero' | wc -l
Expand All @@ -68,8 +64,7 @@
minutes: 2


- name: "RHELOSP-176039"
# Remove alertmanagerConfigManifest from the ServiceTelemetry object
- name: "RHELOSP-176039 Remove alertmanagerConfigManifest from the ServiceTelemetry object"
ansible.builtin.shell:
cmd: |
oc patch stf/default --type='json' -p '[{"op": "remove", "path": "/spec/alertmanagerConfigManifest"}]'
Expand Down
9 changes: 3 additions & 6 deletions roles/test_collectd/tasks/main.yml
Original file line number Diff line number Diff line change
@@ -1,15 +1,13 @@
---
- name: RHELOSP-60406
# Description: Check whether collectd container is running
- name: RHELOSP-60406 Check whether collectd container is running
ansible.builtin.shell: |
set -o pipefail
{{ container_bin }} ps | grep {{ collectd_container_name }}
register: container_nodes
changed_when: false
failed_when: container_nodes.stdout_lines|length != 1

- name: RHELOSP-60411
# Description: Check for a non-zero number of metrics from collectd
- name: RHELOSP-60411 Check for a non-zero number of metrics from collectd
ansible.builtin.command: |
{{ container_bin }} exec {{ collectd_container_name }} collectdctl -s /var/run/collectd-socket listval
register: metrics
Expand All @@ -24,8 +22,7 @@
ansible.builtin.debug:
var: metrics.stdout_lines[-20]

- name: RHELOSP-69331
# Description: Get the value of some metric from collectd
- name: RHELOSP-69331 Get the value of some metric from collectd
ansible.builtin.command: |
{{ container_bin }} exec {{ collectd_container_name }} collectdctl -s /var/run/collectd-socket getval {{ metrics.stdout_lines[-20] }}
register: stat
Expand Down
6 changes: 2 additions & 4 deletions roles/test_metrics_retention/tasks/main.yml
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,7 @@
# Assuming we're in the right project already...


- name: "RHELOSP-144988"
# description: "Set metrics retention to 17d"
- name: "RHELOSP-144988 Set metrics retention to 17d"
ansible.builtin.shell:
cmd: |
oc patch stf/default --type merge -p '{"spec": {"backends": {"metrics": {"prometheus": {"enabled": true, "scrapeInterval": "10s", "storage": {"retention": "17d", "strategy": "ephemeral"}}}}}}'
Expand All @@ -20,8 +19,7 @@
changed_when: false


- name: "RHELOSP-144484"
# description: "Check that the retention was set"
- name: "RHELOSP-144484 Check that the retention was set"
ansible.builtin.shell:
cmd: |
oc describe pod prometheus-default-0 | grep -o 'storage.tsdb.retention.time=17d' | wc -l
Expand Down
Loading

0 comments on commit f593108

Please sign in to comment.