Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

add flapping and percent_state_change (#1926) #1937

Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 8 additions & 4 deletions broker/unified_sql/src/stream.cc
Original file line number Diff line number Diff line change
Expand Up @@ -1381,8 +1381,10 @@ void stream::_init_statements() {
"has_graph=?," // 7: perfdata != ""
"last_check_type=?," // 8: check_type
"last_check=?," // 9: last_check
"output=? " // 10: output
"WHERE id=? AND parent_id=0"); // 11: host_id
"output=?," // 10: output
"flapping=?," // 11: is_flapping
"percent_state_change=? " // 12: percent_state_change
"WHERE id=? AND parent_id=0"); // 13: host_id

const std::string sscr_resources_query(
"UPDATE resources SET "
Expand All @@ -1396,8 +1398,10 @@ void stream::_init_statements() {
"has_graph=?," // 7: perfdata != ""
"last_check_type=?," // 8: check_type
"last_check=?," // 9: last_check
"output=? " // 10: output
"WHERE id=? AND parent_id=?"); // 11, 12: service_id and host_id
"output=? ," // 10: output
"flapping=?," // 11: is_flapping
"percent_state_change=? " // 12: percent_state_change
"WHERE id=? AND parent_id=?"); // 13, 14: service_id and host_id
if (_store_in_hosts_services) {
if (_bulk_prepared_statement) {
auto hu = std::make_unique<database::mysql_bulk_stmt>(hscr_query);
Expand Down
48 changes: 34 additions & 14 deletions broker/unified_sql/src/stream_sql.cc
Original file line number Diff line number Diff line change
Expand Up @@ -1928,8 +1928,9 @@ void stream::_process_pb_host(const std::shared_ptr<io::data>& d) {
"notes,"
"action_url,"
"notifications_enabled,passive_checks_enabled,"
"active_checks_enabled,enabled,icon_id) "
"VALUES(?,0,1,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,1,?"
"active_checks_enabled,enabled,icon_id,"
"flapping,percent_state_change)"
"VALUES(?,0,1,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,1,?,?,?"
")");
_resources_host_update = _mysql.prepare_query(
"UPDATE resources SET "
Expand All @@ -1940,7 +1941,8 @@ void stream::_process_pb_host(const std::shared_ptr<io::data>& d) {
"poller_id=?,severity_id=?,name=?,address=?,alias=?,"
"parent_name=?,notes_url=?,notes=?,action_url=?,"
"notifications_enabled=?,passive_checks_enabled=?,"
"active_checks_enabled=?,icon_id=?,enabled=1 WHERE "
"active_checks_enabled=?,icon_id=?,enabled=1, flapping=?,"
"percent_state_change=? WHERE "
"resource_id=?");
if (!_resources_tags_remove.prepared())
_resources_tags_remove = _mysql.prepare_query(
Expand Down Expand Up @@ -2056,6 +2058,8 @@ uint64_t stream::_process_pb_host_in_resources(const Host& h, int32_t conn) {
_resources_host_insert.bind_value_as_bool(19, h.passive_checks());
_resources_host_insert.bind_value_as_bool(20, h.active_checks());
_resources_host_insert.bind_value_as_u64(21, h.icon_id());
_resources_host_insert.bind_value_as_bool(22, h.flapping());
_resources_host_insert.bind_value_as_f64(23, h.percent_state_change());

std::promise<uint64_t> p;
std::future<uint64_t> future = p.get_future();
Expand Down Expand Up @@ -2151,7 +2155,9 @@ uint64_t stream::_process_pb_host_in_resources(const Host& h, int32_t conn) {
_resources_host_update.bind_value_as_bool(18, h.passive_checks());
_resources_host_update.bind_value_as_bool(19, h.active_checks());
_resources_host_update.bind_value_as_u64(20, h.icon_id());
_resources_host_update.bind_value_as_u64(21, res_id);
_resources_host_update.bind_value_as_bool(21, h.flapping());
_resources_host_update.bind_value_as_f64(22, h.percent_state_change());
_resources_host_update.bind_value_as_u64(23, res_id);

_mysql.run_statement(_resources_host_update,
database::mysql_error::store_host_resources, conn);
Expand Down Expand Up @@ -2531,7 +2537,9 @@ void stream::_process_pb_host_status(const std::shared_ptr<io::data>& d) {
else
b->set_value_as_u64(9, hscr.last_check());
b->set_value_as_str(10, hscr.output());
b->set_value_as_u64(11, hscr.host_id());
b->set_value_as_bool(11, hscr.flapping());
b->set_value_as_f64(12, hscr.percent_state_change());
b->set_value_as_u64(13, hscr.host_id());
b->next_row();
} else {
_hscr_resources_update->bind_value_as_i32(0, hscr.state());
Expand All @@ -2551,7 +2559,10 @@ void stream::_process_pb_host_status(const std::shared_ptr<io::data>& d) {
_hscr_resources_update->bind_value_as_u64_ext(
9, hscr.last_check(), mapping::entry::invalid_on_zero);
_hscr_resources_update->bind_value_as_str(10, hscr.output());
_hscr_resources_update->bind_value_as_u64(11, hscr.host_id());
_hscr_resources_update->bind_value_as_bool(11, hscr.flapping());
_hscr_resources_update->bind_value_as_f64(12,
hscr.percent_state_change());
_hscr_resources_update->bind_value_as_u64(13, hscr.host_id());

_mysql.run_statement(*_hscr_resources_update,
database::mysql_error::store_host_status, conn);
Expand Down Expand Up @@ -3743,8 +3754,8 @@ void stream::_process_pb_service(const std::shared_ptr<io::data>& d) {
"severity_id,name,parent_name,notes_url,notes,action_url,"
"notifications_enabled,passive_checks_enabled,active_"
"checks_"
"enabled,enabled,icon_id) "
"VALUES(?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,1,?)");
"enabled,enabled,icon_id, flapping, percent_state_change) "
"VALUES(?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,1,?,?,?)");
_resources_service_update = _mysql.prepare_query(
"UPDATE resources SET "
"type=?,internal_id=?,status=?,status_ordered=?,last_"
Expand All @@ -3756,7 +3767,7 @@ void stream::_process_pb_service(const std::shared_ptr<io::data>& d) {
","
"notes=?,action_url=?,notifications_enabled=?,"
"passive_checks_enabled=?,active_checks_enabled=?,icon_id=?"
","
", flapping=?, percent_state_change=?,"
"enabled=1 WHERE resource_id=?");
if (!_resources_disable.prepared()) {
_resources_disable = _mysql.prepare_query(
Expand Down Expand Up @@ -3866,6 +3877,8 @@ uint64_t stream::_process_pb_service_in_resources(const Service& s,
_resources_service_insert.bind_value_as_bool(20, s.passive_checks());
_resources_service_insert.bind_value_as_bool(21, s.active_checks());
_resources_service_insert.bind_value_as_u64(22, s.icon_id());
_resources_service_insert.bind_value_as_bool(23, s.flapping());
_resources_service_insert.bind_value_as_f64(24, s.percent_state_change());

std::promise<uint64_t> p;
std::future<uint64_t> future = p.get_future();
Expand Down Expand Up @@ -3962,7 +3975,9 @@ uint64_t stream::_process_pb_service_in_resources(const Service& s,
_resources_service_update.bind_value_as_bool(18, s.passive_checks());
_resources_service_update.bind_value_as_bool(19, s.active_checks());
_resources_service_update.bind_value_as_u64(20, s.icon_id());
_resources_service_update.bind_value_as_u64(21, res_id);
_resources_service_update.bind_value_as_bool(21, s.flapping());
_resources_service_update.bind_value_as_f64(22, s.percent_state_change());
_resources_service_update.bind_value_as_u64(23, res_id);

_mysql.run_statement(_resources_service_update,
database::mysql_error::store_service, conn);
Expand Down Expand Up @@ -4564,8 +4579,10 @@ void stream::_process_pb_service_status(const std::shared_ptr<io::data>& d) {
b->set_value_as_u64(9, sscr.last_check());
b->set_value_as_str(
10, fmt::string_view(sscr.output().c_str(), output_size));
b->set_value_as_u64(11, sscr.service_id());
b->set_value_as_u64(12, sscr.host_id());
b->set_value_as_bool(11, sscr.flapping());
b->set_value_as_f64(12, sscr.percent_state_change());
b->set_value_as_u64(13, sscr.service_id());
b->set_value_as_u64(14, sscr.host_id());
b->next_row();
SPDLOG_LOGGER_TRACE(
_logger_sql, "{} waiting updates for service status in resources",
Expand Down Expand Up @@ -4596,8 +4613,11 @@ void stream::_process_pb_service_status(const std::shared_ptr<io::data>& d) {
9, sscr.last_check(), mapping::entry::invalid_on_zero);
_sscr_resources_update->bind_value_as_str(
10, fmt::string_view(sscr.output().c_str(), output_size));
_sscr_resources_update->bind_value_as_u64(11, sscr.service_id());
_sscr_resources_update->bind_value_as_u64(12, sscr.host_id());
_sscr_resources_update->bind_value_as_bool(11, sscr.flapping());
_sscr_resources_update->bind_value_as_f64(12,
sscr.percent_state_change());
_sscr_resources_update->bind_value_as_u64(13, sscr.service_id());
_sscr_resources_update->bind_value_as_u64(14, sscr.host_id());

_mysql.run_statement(*_sscr_resources_update,
database::mysql_error::store_service_status, conn);
Expand Down
2 changes: 2 additions & 0 deletions resources/centreon_storage.sql
Original file line number Diff line number Diff line change
Expand Up @@ -1106,6 +1106,8 @@ CREATE TABLE `resources` (
`last_check` bigint(20) unsigned DEFAULT NULL COMMENT 'the last check timestamp',
`output` text DEFAULT NULL,
`enabled` tinyint(1) NOT NULL DEFAULT 1 COMMENT '0=false, 1=true',
`flapping` tinyint(1) DEFAULT NULL,
`percent_state_change` double DEFAULT NULL,
PRIMARY KEY (`resource_id`),
UNIQUE KEY `resources_id_parent_id_type_uindex` (`id`,`parent_id`,`type`),
KEY `resources_severities_severity_id_fk` (`severity_id`),
Expand Down
84 changes: 84 additions & 0 deletions tests/broker-engine/output-tables.robot
Original file line number Diff line number Diff line change
Expand Up @@ -272,3 +272,87 @@ BE_DEFAULT_NOTIFCATION_INTERVAL_IS_ZERO_SERVICE_RESOURCE
Should Be Equal As Strings ${output} ((0.0, 0.0),)
Ctn Stop engine
Ctn Kindly Stop Broker

BE_FLAPPING_SERVICE_RESOURCE
[Documentation] With BBDO 3, flapping detection must be set in services and resources tables.
[Tags] broker engine protobuf MON-154773
Ctn Config Engine ${1}
Ctn Config Broker central
Ctn Config Broker module
Ctn Config Broker rrd
Ctn Config Broker Sql Output central unified_sql
Ctn Config BBDO3 1
Ctn Engine Config Set Value 0 enable_flap_detection 1
Ctn Set Services Passive ${0} service_1
Ctn Engine Config Set Value In Services 0 service_1 flap_detection_enabled 1
Ctn Engine Config Set Value In Services 0 service_1 low_flap_threshold 10
Ctn Engine Config Set Value In Services 0 service_1 high_flap_threshold 20
Ctn Engine Config Set Value In Services 0 service_1 flap_detection_options all

Connect To Database pymysql ${DBName} ${DBUser} ${DBPass} ${DBHost} ${DBPort}
Execute SQL String DELETE FROM services
Execute SQL String DELETE FROM resources
Execute SQL String DELETE FROM hosts

Ctn Clear Retention

Ctn Start Broker
Ctn Start engine

# Let's wait for the external command check start
Ctn Wait For Engine To Be Ready ${1}

# generate flapping
FOR ${index} IN RANGE 21
Ctn Process Service Result Hard host_1 service_1 2 flapping
Ctn Process Service Check Result host_1 service_1 0 flapping
Sleep 1s
END

${result} Ctn Check Service Flapping host_1 service_1 30 5 50
Should Be True ${result} The service or resource (host_1,service_1) is not flapping as expected

[Teardown] Ctn Stop Engine Broker And Save Logs


BE_FLAPPING_HOST_RESOURCE
[Documentation] With BBDO 3, flapping detection must be set in hosts and resources tables.
[Tags] broker engine protobuf MON-154773
Ctn Config Engine ${1}
Ctn Config Broker central
Ctn Config Broker module
Ctn Config Broker rrd
Ctn Config Broker Sql Output central unified_sql
Ctn Config BBDO3 1
Ctn Engine Config Set Value 0 enable_flap_detection 1
Ctn Set Hosts Passive ${0} host_1
Ctn Engine Config Set Value In Hosts 0 host_1 flap_detection_enabled 1
Ctn Engine Config Set Value In Hosts 0 host_1 low_flap_threshold 10
Ctn Engine Config Set Value In Hosts 0 host_1 high_flap_threshold 20
Ctn Engine Config Set Value In Hosts 0 host_1 flap_detection_options all
Ctn Broker Config Log central sql trace

Connect To Database pymysql ${DBName} ${DBUser} ${DBPass} ${DBHost} ${DBPort}
Execute SQL String DELETE FROM services
Execute SQL String DELETE FROM resources
Execute SQL String DELETE FROM hosts

Ctn Clear Retention

Ctn Start Broker
Ctn Start engine

# Let's wait for the external command check start
Ctn Wait For Engine To Be Ready ${1}

# generate flapping
FOR ${index} IN RANGE 21
Ctn Process Host Result Hard host_1 2 flapping
Ctn Process Host Check Result host_1 0 flapping
Sleep 1s
END

${result} Ctn Check Host Flapping host_1 30 5 50
Should Be True ${result} The host or resource host_1 is not flapping as expected

[Teardown] Ctn Stop Engine Broker And Save Logs
70 changes: 70 additions & 0 deletions tests/resources/Common.py
Original file line number Diff line number Diff line change
Expand Up @@ -2039,3 +2039,73 @@ def ctn_get_nb_process(exe:str):
if exe in p.name() or exe in ' '.join(p.cmdline()):
counter += 1
return counter

def ctn_check_service_flapping(host: str, serv: str, timeout: int, precision: float, expected: int):
"""
Check if performance data are near as expected.
host (str): The hostname of the service to check.
serv (str): The service name to check.
timeout (int): The timeout value for the check.
precision (float): The precision required for the performance data comparison.
expected (int): expected flapping value.
"""
limit = time.time() + timeout

s_query = f"""SELECT s.flapping, s.percent_state_change FROM services s JOIN hosts h on s.host_id = h.host_id WHERE h.name='{host}' AND description='{serv}'"""
r_query = f"""SELECT flapping, percent_state_change FROM resources WHERE parent_name='{host}' AND name='{serv}'"""


while time.time() < limit:
connection = pymysql.connect(host=DB_HOST,
user=DB_USER,
password=DB_PASS,
database=DB_NAME_STORAGE,
charset='utf8mb4',
cursorclass=pymysql.cursors.DictCursor)
with connection:
with connection.cursor() as cursor:
cursor.execute(s_query)
result = cursor.fetchall()
if len(result) == 1 and result[0]['flapping'] == 1 and abs(result[0]['percent_state_change'] - expected) < precision:
cursor.execute(r_query)
result = cursor.fetchall()
if len(result) == 1 and result[0]['flapping'] == 1 and abs(result[0]['percent_state_change'] - expected) < precision:
return True
time.sleep(1)
logger.console(f"unexpected result: {result}")
return False

def ctn_check_host_flapping(host: str, timeout: int, precision: float, expected: int):
"""
Check if performance data are near as expected.
host (str): The hostname of the service to check.
timeout (int): The timeout value for the check.
precision (float): The precision required for the performance data comparison.
expected (int): expected flapping value.
"""
limit = time.time() + timeout

s_query = f"""SELECT flapping, percent_state_change FROM hosts WHERE name='{host}'"""
r_query = f"""SELECT flapping, percent_state_change FROM resources WHERE name='{host}' AND parent_id=0"""


while time.time() < limit:
connection = pymysql.connect(host=DB_HOST,
user=DB_USER,
password=DB_PASS,
database=DB_NAME_STORAGE,
charset='utf8mb4',
cursorclass=pymysql.cursors.DictCursor)
with connection:
with connection.cursor() as cursor:
cursor.execute(s_query)
result = cursor.fetchall()
if len(result) == 1 and result[0]['flapping'] == 1 and abs(result[0]['percent_state_change'] - expected) < precision:
cursor.execute(r_query)
result = cursor.fetchall()
if len(result) == 1 and result[0]['flapping'] == 1 and abs(result[0]['percent_state_change'] - expected) < precision:
return True
time.sleep(1)
logger.console(f"unexpected result: {result}")
return False

10 changes: 10 additions & 0 deletions tests/resources/resources.resource
Original file line number Diff line number Diff line change
Expand Up @@ -399,6 +399,16 @@ Ctn Process Service Result Hard
Sleep 1s
END

Ctn Process Host Result Hard
[Arguments] ${host} ${state} ${output}
FOR ${idx} IN RANGE 3
Ctn Process Host Check Result
... ${host}
... ${state}
... ${output}
Sleep 1s
END


Ctn Wait For Engine To Be Ready
[Arguments] ${start} ${nbEngine}=1
Expand Down
Loading