diff --git a/dockers/docker-fpm-frr/bgp_regex.json b/dockers/docker-fpm-frr/bgp_regex.json index 898b5b060ebe..f02ae53ed26f 100644 --- a/dockers/docker-fpm-frr/bgp_regex.json +++ b/dockers/docker-fpm-frr/bgp_regex.json @@ -3,6 +3,16 @@ "tag": "bgp-state", "regex": "Peer .default\\|([0-9a-f:.]*[0-9a-f]*). admin state is set to .(up|down).", "params": [ "ip", "status" ] + }, + { + "tag": "zebra-no-buff", + "regex": "No buffer space available", + "params": [] + }, + { + "tag": "notification", + "regex": "NOTIFICATION: (received|sent) (?:to|from) neighbor ([0-9a-f:.]*[0-9a-f+]*)\\s*.* (\\d*)\/(\\d*)", + "params": [ "is-sent", "ip", "major-code", "minor-code" ] } ] diff --git a/files/build_templates/dhcp_relay_regex.json b/files/build_templates/dhcp_relay_regex.json new file mode 100644 index 000000000000..c7aa81eaab18 --- /dev/null +++ b/files/build_templates/dhcp_relay_regex.json @@ -0,0 +1,7 @@ +[ + { + "tag": "dhcp-relay-discard", + "regex": "Discarding packet received on ([a-zA-Z0-9-_]*) interface that has no IPv4 address assigned.", + "params": [ "ifname" ] + } +] diff --git a/files/build_templates/dockerd_regex.json b/files/build_templates/dockerd_regex.json new file mode 100644 index 000000000000..09270766ac71 --- /dev/null +++ b/files/build_templates/dockerd_regex.json @@ -0,0 +1,7 @@ +[ + { + "tag": "invalid-freelist", + "regex": "invalid freelist", + "params": [] + } +] diff --git a/files/build_templates/events_info.json b/files/build_templates/events_info.json index d2e2eb151b48..b83afc3caf79 100644 --- a/files/build_templates/events_info.json +++ b/files/build_templates/events_info.json @@ -3,7 +3,7 @@ "proclist": [ { "name": "monit", - "parse_json": "monit_regex.json" + "parse_json": "monit_regex.json" }, { "name": "sshd", @@ -12,6 +12,30 @@ { "name": "systemd", "parse_json": "systemd_regex.json" + }, + { + "name": "dhcp_relay", + "parse_json": "dhcp_relay_regex.json" + }, + { + "name": "syncd", + "parse_json": "syncd_regex.json" + }, + { + "name": "kernel", + "parse_json": "kernel_regex.json" + }, + { + "name": "dockerd", + "parse_json": "dockerd_regex.json" + }, + { + "name": "arista", + "parse_json": "seu_regex.json" + }, + { + "name": "python3", + "parse_json": "seu_regex.json" } ] } diff --git a/files/build_templates/kernel_regex.json b/files/build_templates/kernel_regex.json new file mode 100644 index 000000000000..5afa166e0d63 --- /dev/null +++ b/files/build_templates/kernel_regex.json @@ -0,0 +1,7 @@ +[ + { + "tag": "event-kernel", + "regex": "(write failed|Write protected|Remounting filesystem read-only|zlib decompression failed, data probably corrupt)", + "params": [ "fail_type:ret=(arg==\"write failed\")and\"write_failed\"or((arg==\"Write protected\")and\"write_protected\"or((arg==\"Remounting filesystem read-only\")and\"remount_read_only\"or((arg==\"zlib decompression failed, data probably corrupt\")and\"zlib_decompress\"or\"\")))" ] + } +] diff --git a/files/build_templates/seu_regex.json b/files/build_templates/seu_regex.json new file mode 100644 index 000000000000..034de15dcb44 --- /dev/null +++ b/files/build_templates/seu_regex.json @@ -0,0 +1,7 @@ +[ + { + "tag": "event-seu", + "regex": "SEU error was detected", + "params": [] + } +] diff --git a/files/build_templates/sonic_debian_extension.j2 b/files/build_templates/sonic_debian_extension.j2 index 511f54fd3997..1d9320fb5305 100644 --- a/files/build_templates/sonic_debian_extension.j2 +++ b/files/build_templates/sonic_debian_extension.j2 @@ -328,6 +328,11 @@ j2 -f json $BUILD_TEMPLATES/rsyslog_plugin.conf.j2 $BUILD_TEMPLATES/events_info. sudo cp $BUILD_TEMPLATES/monit_regex.json $FILESYSTEM_ROOT_ETC/rsyslog.d/ sudo cp $BUILD_TEMPLATES/sshd_regex.json $FILESYSTEM_ROOT_ETC/rsyslog.d/ sudo cp $BUILD_TEMPLATES/systemd_regex.json $FILESYSTEM_ROOT_ETC/rsyslog.d/ +sudo cp $BUILD_TEMPLATES/dhcp_relay_regex.json $FILESYSTEM_ROOT_ETC/rsyslog.d/ +sudo cp $BUILD_TEMPLATES/syncd_regex.json $FILESYSTEM_ROOT_ETC/rsyslog.d/ +sudo cp $BUILD_TEMPLATES/kernel_regex.json $FILESYSTEM_ROOT_ETC/rsyslog.d/ +sudo cp $BUILD_TEMPLATES/dockerd_regex.json $FILESYSTEM_ROOT_ETC/rsyslog.d/ +sudo cp $BUILD_TEMPLATES/seu_regex.json $FILESYSTEM_ROOT_ETC/rsyslog.d/ # Install custom-built monit package and SONiC configuration files sudo dpkg --root=$FILESYSTEM_ROOT -i $debs_path/monit_*.deb || \ diff --git a/files/build_templates/syncd_regex.json b/files/build_templates/syncd_regex.json new file mode 100644 index 000000000000..cc78c30f539f --- /dev/null +++ b/files/build_templates/syncd_regex.json @@ -0,0 +1,7 @@ +[ + { + "tag": "syncd-failure", + "regex": "(MMU ERR Type|L3 route add failed with error|Assertion failed|Received switch event|SER Parity Check Error)", + "params": [ "fail_type:ret=(arg==\"Received switch event\")and\"switch_event\"or((arg==\"Assertion Failed\")and\"assert\"or((arg==\"SER Parity Check Error\")and\"parity_check\"or((arg==\"MMU ERR Type\")and\"mmu_err\"or((arg==\"route add failed\")and\"route_add_failed\"or\"\"))))" ] + } +] diff --git a/files/build_templates/systemd_regex.json b/files/build_templates/systemd_regex.json index d6ea5619eed9..bb1b220f534d 100644 --- a/files/build_templates/systemd_regex.json +++ b/files/build_templates/systemd_regex.json @@ -2,6 +2,11 @@ { "tag": "event-stopped-ctr", "regex": "Stopped ([a-zA-Z-_\\s]*) container", - "params": [ "ctr-name" ] + "params": [ "ctr_name" ] + }, + { + "tag": "watchdog-timeout", + "regex": "(?:watchdog|Watchdog) timeout .limit.([0-9])min.", + "params": [ "limit" ] } ] diff --git a/files/image_config/monit/container_checker b/files/image_config/monit/container_checker index 6d7f1403d7ae..8ca86c0653d2 100755 --- a/files/image_config/monit/container_checker +++ b/files/image_config/monit/container_checker @@ -158,7 +158,7 @@ def publish_events(lst): params = swsscommon.FieldValueMap() for ctr in lst: - params["name"] = ctr; + params["ctr_name"] = ctr; swsscommon.event_publish(events_handle, EVENTS_PUBLISHER_TAG, params) swsscommon.events_deinit_publisher(events_handle) diff --git a/src/dhcpmon/src/dhcp_mon.cpp b/src/dhcpmon/src/dhcp_mon.cpp index 74d9869741d1..dd850d00d280 100644 --- a/src/dhcpmon/src/dhcp_mon.cpp +++ b/src/dhcpmon/src/dhcp_mon.cpp @@ -15,6 +15,7 @@ #include "dhcp_mon.h" #include "dhcp_devman.h" +#include "events.h" /** DHCP device/interface state */ typedef struct @@ -40,6 +41,8 @@ static struct event *ev_sigterm; /** libevent SIGUSR1 signal event struct */ static struct event *ev_sigusr1; +event_handle_t g_events_handle; + /** DHCP monitor state data for aggregate device for mgmt device */ static dhcp_mon_state_t state_data[] = { [0] = { @@ -95,7 +98,15 @@ static void check_dhcp_relay_health(dhcp_mon_state_t *state_data) { case DHCP_MON_STATUS_UNHEALTHY: if (++state_data->count > dhcp_unhealthy_max_count) { - syslog(LOG_ALERT, state_data->msg, state_data->count * window_interval_sec, context->intf); + auto duration = state_data->count * window_interval_sec; + std::string vlan(context->intf); + syslog(LOG_ALERT, state_data->msg, duration, vlan); + if (state_data->check_type == DHCP_MON_CHECK_POSITIVE) { + event_params_t params = { + { "vlan", vlan }, + { "duration", std::to_string(duration) }}; + event_publish(g_events_handle, "dhcp-relay-disparity", ¶ms); + } dhcp_devman_print_status(context, DHCP_COUNTERS_SNAPSHOT); dhcp_devman_print_status(context, DHCP_COUNTERS_CURRENT); } @@ -179,6 +190,8 @@ int dhcp_mon_init(int window_sec, int max_count) break; } + g_events_handle = events_init_publisher("sonic-events-dhcp-relay"); + rv = 0; } while (0); @@ -203,6 +216,8 @@ void dhcp_mon_shutdown() event_free(ev_sigusr1); event_base_free(base); + + events_deinit_publisher(g_events_handle); } /**