diff --git a/lib/logmsg/logmsg.c b/lib/logmsg/logmsg.c index a4ffa2b0a1e..06ea2e88d6a 100644 --- a/lib/logmsg/logmsg.c +++ b/lib/logmsg/logmsg.c @@ -2011,14 +2011,13 @@ void log_msg_tags_init(void) { log_tags_register_predefined_tag("message.utf8_sanitized", LM_T_MSG_UTF8_SANITIZED); - log_tags_register_predefined_tag("message.parse_error", LM_T_MSG_PARSE_ERROR); + log_tags_register_predefined_tag("syslog.invalid_pri", LM_T_SYSLOG_INVALID_PRI); log_tags_register_predefined_tag("syslog.missing_pri", LM_T_SYSLOG_MISSING_PRI); log_tags_register_predefined_tag("syslog.missing_timestamp", LM_T_SYSLOG_MISSING_TIMESTAMP); log_tags_register_predefined_tag("syslog.invalid_hostname", LM_T_SYSLOG_INVALID_HOSTNAME); log_tags_register_predefined_tag("syslog.unexpected_framing", LM_T_SYSLOG_UNEXPECTED_FRAMING); log_tags_register_predefined_tag("syslog.rfc3164_missing_header", LM_T_SYSLOG_RFC3164_MISSING_HEADER); - log_tags_register_predefined_tag("syslog.rfc5424_unquoted_sdata_value", LM_T_SYSLOG_RFC5424_UNQUOTED_SDATA_VALUE); log_tags_register_predefined_tag("syslog.rfc5424_missing_hostname", LM_T_SYSLOG_RFC5424_MISSING_HOSTNAME); log_tags_register_predefined_tag("syslog.rfc5424_missing_app_name", LM_T_SYSLOG_RFC5424_MISSING_APP_NAME); diff --git a/lib/logmsg/logmsg.h b/lib/logmsg/logmsg.h index 81420cec5c1..2aa20aa2149 100644 --- a/lib/logmsg/logmsg.h +++ b/lib/logmsg/logmsg.h @@ -106,10 +106,10 @@ enum { /* means that the message is not valid utf8 */ LM_T_MSG_UTF8_SANITIZED, - /* msg-format parsing failed, "Error parsing ..." */ - LM_T_MSG_PARSE_ERROR, /* missing value */ LM_T_SYSLOG_MISSING_PRI, + /* invalid value */ + LM_T_SYSLOG_INVALID_PRI, /* no timestamp present in the original message */ LM_T_SYSLOG_MISSING_TIMESTAMP, /* hostname field does not seem valid, check-hostname(yes) failed */ @@ -118,8 +118,6 @@ enum LM_T_SYSLOG_UNEXPECTED_FRAMING, /* no date & host information in the syslog message */ LM_T_SYSLOG_RFC3164_MISSING_HEADER, - /* incorrectly quoted RFC5424 SDATA */ - LM_T_SYSLOG_RFC5424_UNQUOTED_SDATA_VALUE, /* hostname field missing */ LM_T_SYSLOG_RFC5424_MISSING_HOSTNAME, /* program field missing */ diff --git a/lib/msg-format.c b/lib/msg-format.c index 5e48e3563cd..97d9c22b536 100644 --- a/lib/msg-format.c +++ b/lib/msg-format.c @@ -181,9 +181,10 @@ msg_format_parse_into(MsgFormatOptions *options, LogMessage *msg, if (!msg_format_try_parse_into(options, msg, data, length, &problem_position)) { - log_msg_set_tag_by_id(msg, LM_T_MSG_PARSE_ERROR); if (options->flags & LP_PIGGYBACK_ERRORS) msg_format_inject_parse_error(options, msg, data, _rstripped_message_length(data, length), problem_position); + else + log_msg_set_value(msg, LM_V_MESSAGE, (gchar *) data, length); /* the injected error message needs to be postprocessed too */ msg_format_postprocess_message(options, msg, data, length); diff --git a/modules/syslogformat/syslog-format.c b/modules/syslogformat/syslog-format.c index 01a6871033d..1f73f0ace8a 100644 --- a/modules/syslogformat/syslog-format.c +++ b/modules/syslogformat/syslog-format.c @@ -1005,6 +1005,9 @@ _syslog_format_parse_legacy_message(LogMessage *msg, * * Parse an RFC3164 formatted log message and store the parsed information * in @msg. Parsing is affected by the bits set @flags argument. + * + * This parser is _very_ forgiving, it basically accepts anything any device + * would barf on the line. **/ static gboolean _syslog_format_parse_legacy(const MsgFormatOptions *parse_options, @@ -1020,19 +1023,27 @@ _syslog_format_parse_legacy(const MsgFormatOptions *parse_options, _syslog_format_check_framing(msg, &src, &left); if (!_syslog_format_parse_pri(msg, &src, &left, parse_options->flags, parse_options->default_pri)) { - goto error; + /* invalid value, that's really difficult to do, as it needs to + * start with an opening bracket and then no number OR no closing bracket + * follows. A missing value would be accepted. + * + * This is a very rare case, but it's best handled like all the other + * formatting errors, accept it and shove the entire line into $MSG. + * This basically disables error piggybacking for RFC3164 inputs. */ + + log_msg_set_tag_by_id(msg, LM_T_SYSLOG_INVALID_PRI); + _syslog_format_parse_legacy_message(msg, &src, &left, parse_options); } + else + { + if ((parse_options->flags & LP_NO_HEADER) == 0) + _syslog_format_parse_legacy_header(msg, &src, &left, parse_options); - if ((parse_options->flags & LP_NO_HEADER) == 0) - _syslog_format_parse_legacy_header(msg, &src, &left, parse_options); - - _syslog_format_parse_legacy_message(msg, &src, &left, parse_options); + _syslog_format_parse_legacy_message(msg, &src, &left, parse_options); + } log_msg_set_value_to_string(msg, LM_V_MSGFORMAT, "rfc3164"); return TRUE; -error: - *position = src - data; - return FALSE; } /** diff --git a/modules/syslogformat/tests/test_syslog_format.c b/modules/syslogformat/tests/test_syslog_format.c index 887411b54ac..559a68e4080 100644 --- a/modules/syslogformat/tests/test_syslog_format.c +++ b/modules/syslogformat/tests/test_syslog_format.c @@ -86,6 +86,76 @@ Test(syslog_format, cisco_sequence_id_non_zero_termination) log_msg_unref(msg); } +Test(syslog_format, rfc3164_error_invalid_pri) +{ + /* incorrect pri value */ + const gchar *data = "<189 Feb 3 12:34:56 host program[pid]: message"; + gsize data_length = strlen(data); + + LogMessage *msg = log_msg_new_empty(); + + gsize problem_position; + cr_assert(syslog_format_handler(&parse_options, msg, (const guchar *) data, data_length, &problem_position)); + assert_log_message_value_by_name(msg, "MSG", "<189 Feb 3 12:34:56 host program[pid]: message"); + assert_log_message_has_tag(msg, "syslog.invalid_pri"); + + log_msg_unref(msg); +} + +Test(syslog_format, rfc3164_error_missing_timestamp) +{ + /* incorrect pri value */ + const gchar *data = "<189> program[pid]: message"; + gsize data_length = strlen(data); + + LogMessage *msg = log_msg_new_empty(); + + gsize problem_position; + cr_assert(syslog_format_handler(&parse_options, msg, (const guchar *) data, data_length, &problem_position)); + /* without timestamp, host is not expected */ + assert_log_message_value_by_name(msg, "HOST", ""); + assert_log_message_value_by_name(msg, "PROGRAM", "program"); + assert_log_message_value_by_name(msg, "PID", "pid"); + assert_log_message_value_by_name(msg, "MSG", "message"); + assert_log_message_value_by_name(msg, "MSGFORMAT", "rfc3164"); + assert_log_message_has_tag(msg, "syslog.missing_timestamp"); + assert_log_message_has_tag(msg, "syslog.rfc3164_missing_header"); + + log_msg_unref(msg); +} + +Test(syslog_format, rfc5424_error_invalid_timestamp) +{ + const gchar *data = "<189>1 2024-09-16Q11:22:33+02:00 host program pid msgid [foo bar=baz] message"; + gsize data_length = strlen(data); + + parse_options.flags |= LP_SYSLOG_PROTOCOL; + LogMessage *msg = log_msg_new_empty(); + + gsize problem_position; + cr_assert_not(syslog_format_handler(&parse_options, msg, (const guchar *) data, data_length, &problem_position)); + assert_log_message_value_by_name(msg, "MSGFORMAT", ""); + assert_log_message_has_tag(msg, "syslog.missing_timestamp"); + + log_msg_unref(msg); +} + +Test(syslog_format, rfc5424_error_invalid_sdata) +{ + const gchar *data = "<189>1 2024-09-16T11:22:33+02:00 host program pid msgid [foo bar=baz message"; + gsize data_length = strlen(data); + + parse_options.flags |= LP_SYSLOG_PROTOCOL; + LogMessage *msg = log_msg_new_empty(); + + gsize problem_position; + cr_assert_not(syslog_format_handler(&parse_options, msg, (const guchar *) data, data_length, &problem_position)); + assert_log_message_value_by_name(msg, "MSGFORMAT", ""); + assert_log_message_has_tag(msg, "syslog.rfc5424_invalid_sdata"); + + log_msg_unref(msg); +} + Test(syslog_format, rfc3164_style_message_when_parsed_as_rfc5424_is_marked_as_such_in_msgformat) { const gchar *data = "<189>Feb 3 12:34:56 host program[pid]: message";