ovs_stats_la_LIBADD = $(BUILD_WITH_LIBYAJL_LIBS)
endif
+if BUILD_PLUGIN_PCIE_ERRORS
+pkglib_LTLIBRARIES += pcie_errors.la
+pcie_errors_la_SOURCES = src/pcie_errors.c
+pcie_errors_la_CPPFLAGS = $(AM_CPPFLAGS)
+pcie_errors_la_LDFLAGS = $(PLUGIN_LDFLAGS)
+
+test_plugin_pcie_errors_SOURCES = \
+ src/pcie_errors_test.c \
+ src/daemon/utils_llist.c \
+ src/daemon/configfile.c \
+ src/daemon/types_list.c
+test_plugin_pcie_errors_CPPFLAGS = $(AM_CPPFLAGS)
+test_plugin_pcie_errors_LDFLAGS = $(PLUGIN_LDFLAGS)
+test_plugin_pcie_errors_LDADD = liboconfig.la libplugin_mock.la
+check_PROGRAMS += test_plugin_pcie_errors
+TESTS += test_plugin_pcie_errors
+endif
+
if BUILD_PLUGIN_PERL
pkglib_LTLIBRARIES += perl.la
perl_la_SOURCES = src/perl.c
OVS documentation.
<http://openvswitch.org/support/dist-docs/INSTALL.rst.html>
+ - pcie_errors
+ Read errors from PCI Express Device Status and AER extended capabilities.
+ <https://www.design-reuse.com/articles/38374/pcie-error-logging-and-handling-on-a-typical-soc.html>
+
- perl
The perl plugin implements a Perl-interpreter into collectd. You can
write your own plugins in Perl and return arbitrary values using this
AC_DEFINE([HAVE_CAPABILITY], [1], [Define to 1 if you have cap_get_proc() (-lcap).])
fi
+ # For pcie_errors plugin
+ AC_CHECK_HEADERS([linux/pci_regs.h],
+ [have_pci_regs_h="yes"],
+ [have_pci_regs_h="no (linux/pci_regs.h not found)"]
+ )
+
else
have_linux_raid_md_u_h="no"
have_linux_wireless_h="no"
plugin_numa="no"
plugin_ovs_events="no"
plugin_ovs_stats="no"
+plugin_pcie_errors="no"
plugin_perl="no"
plugin_pinba="no"
plugin_processes="no"
plugin_ovs_events="yes"
plugin_ovs_stats="yes"
fi
+
+ if test "x$have_pci_regs_h" = "xyes"; then
+ plugin_pcie_errors="yes"
+ fi
fi
if test "x$ac_system" = "xOpenBSD"; then
AC_PLUGIN([oracle], [$with_oracle], [Oracle plugin])
AC_PLUGIN([ovs_events], [$plugin_ovs_events], [OVS events plugin])
AC_PLUGIN([ovs_stats], [$plugin_ovs_stats], [OVS statistics plugin])
+AC_PLUGIN([pcie_errors], [$plugin_pcie_errors], [PCIe errors plugin])
AC_PLUGIN([perl], [$plugin_perl], [Embed a Perl interpreter])
AC_PLUGIN([pf], [$have_net_pfvar_h], [BSD packet filter (PF) statistics])
# FIXME: Check for libevent, too.
AC_MSG_RESULT([ oracle . . . . . . . $enable_oracle])
AC_MSG_RESULT([ ovs_events . . . . . $enable_ovs_events])
AC_MSG_RESULT([ ovs_stats . . . . . . $enable_ovs_stats])
+AC_MSG_RESULT([ pcie_errors . . . . . $enable_pcie_errors])
AC_MSG_RESULT([ perl . . . . . . . . $enable_perl])
AC_MSG_RESULT([ pf . . . . . . . . . $enable_pf])
AC_MSG_RESULT([ pinba . . . . . . . . $enable_pinba])
#@BUILD_PLUGIN_ORACLE_TRUE@LoadPlugin oracle
#@BUILD_PLUGIN_OVS_EVENTS_TRUE@LoadPlugin ovs_events
#@BUILD_PLUGIN_OVS_STATS_TRUE@LoadPlugin ovs_stats
+#@BUILD_PLUGIN_PCIE_ERRORS_TRUE@LoadPlugin pcie_errors
#@BUILD_PLUGIN_PERL_TRUE@LoadPlugin perl
#@BUILD_PLUGIN_PINBA_TRUE@LoadPlugin pinba
#@BUILD_PLUGIN_PING_TRUE@LoadPlugin ping
# Bridges "br0" "br_ext"
#</Plugin>
+#<Plugin pcie_errors>
+# Source "sysfs"
+# ReportMasked false
+# PersistentNotifications false
+#</Plugin>
+
#<Plugin perl>
# IncludeDir "/my/include/path"
# BaseName "Collectd::Plugins"
=back
+=head2 Plugin C<pcie_errors>
+
+The I<pcie_errors> plugin collects PCI Express errors from Device Status in Capability
+structure and from Advanced Error Reporting Extended Capability where available.
+At every read it polls config space of PCI Express devices and dispatches
+notification for every error that is set. It checks for new errors at every read.
+The device is indicated in plugin_instance according to format "domain:bus:dev.fn".
+Errors are divided into categories indicated by type_instance: "correctable", and
+for uncorrectable errors "non_fatal" or "fatal".
+Fatal errros are reported as I<NOTIF_FAILURE> and all others as I<NOTIF_WARNING>.
+
+B<Synopsis:>
+
+ <Plugin "pcie_errors">
+ Source "sysfs"
+ AccessDir "/sys/bus/pci"
+ ReportMasked false
+ PersistentNotifications false
+ </Plugin>
+
+B<Options:>
+
+=over 4
+
+=item B<Source> B<sysfs>|B<proc>
+
+Use B<sysfs> or B<proc> to read data from /sysfs or /proc.
+The default value is B<sysfs>.
+
+=item B<AccessDir> I<dir>
+
+Directory used to access device config space. It is optional and defaults to
+/sys/bus/pci for B<sysfs> and to /proc/bus/pci for B<proc>.
+
+=item B<ReportMasked> B<false>|B<true>
+
+If true plugin will notify errors that are set to masked in Error Mask register.
+Such errors are not reported to the PCI Express Root Complex. Defaults to
+B<false>.
+
+=item B<PersistentNotifications> B<false>|B<true>
+
+If false plugin will dispatch notfication only on set/clear of error.
+The ones already reported will be ignored. Defaults to B<false>.
+
+=back
+
=head2 Plugin C<perl>
This plugin embeds a Perl-interpreter into collectd and provides an interface
--- /dev/null
+/**
+ * collectd - src/pcie_errors.c
+ *
+ * Copyright(c) 2018 Intel Corporation. All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ * Authors:
+ * Kamil Wiatrowski <kamilx.wiatrowski@intel.com>
+ **/
+
+#include "collectd.h"
+
+#include "common.h"
+#include "utils_llist.h"
+
+#include <linux/pci_regs.h>
+
+#define PCIE_ERRORS_PLUGIN "pcie_errors"
+#define PCIE_DEFAULT_PROCDIR "/proc/bus/pci"
+#define PCIE_DEFAULT_SYSFSDIR "/sys/bus/pci"
+#define PCIE_NAME_LEN 512
+#define PCIE_BUFF_SIZE 1024
+
+#define PCIE_ERROR "pcie_error"
+#define PCIE_SEV_CE "correctable"
+#define PCIE_SEV_FATAL "fatal"
+#define PCIE_SEV_NOFATAL "non_fatal"
+
+#define PCIE_DEV(x) (((x) >> 3) & 0x1f)
+#define PCIE_FN(x) ((x)&0x07)
+
+#define PCIE_ECAP_OFFSET 0x100 /* ECAP always begin at offset 0x100 */
+
+typedef struct pcie_config_s {
+ _Bool use_sysfs;
+ _Bool notif_masked;
+ _Bool persistent;
+ char access_dir[PATH_MAX];
+ _Bool config_error;
+} pcie_config_t;
+
+typedef struct pcie_device_s {
+ int fd;
+ int domain;
+ uint8_t bus;
+ uint8_t device;
+ uint8_t function;
+ int cap_exp;
+ int ecap_aer;
+ uint16_t device_status;
+ uint32_t correctable_errors;
+ uint32_t uncorrectable_errors;
+} pcie_device_t;
+
+typedef struct pcie_fops_s {
+ int (*list_devices)(llist_t *dev_list);
+ int (*open)(pcie_device_t *dev);
+ void (*close)(pcie_device_t *dev);
+ int (*read)(pcie_device_t *dev, void *buff, int size, int pos);
+} pcie_fops_t;
+
+typedef struct pcie_error_s {
+ int mask;
+ const char *desc;
+} pcie_error_t;
+
+static llist_t *pcie_dev_list;
+static pcie_config_t pcie_config = {.access_dir = "", .use_sysfs = 1};
+static pcie_fops_t pcie_fops;
+
+/* Device Error Status */
+static pcie_error_t pcie_base_errors[] = {
+ {PCI_EXP_DEVSTA_CED, "Correctable Error"},
+ {PCI_EXP_DEVSTA_NFED, "Non-Fatal Error"},
+ {PCI_EXP_DEVSTA_FED, "Fatal Error"},
+ {PCI_EXP_DEVSTA_URD, "Unsupported Request"}};
+static const int pcie_base_errors_num = STATIC_ARRAY_SIZE(pcie_base_errors);
+
+/* Uncorrectable Error Status */
+static pcie_error_t pcie_aer_ues[] = {
+#ifdef PCI_ERR_UNC_DLP
+ {PCI_ERR_UNC_DLP, "Data Link Protocol"},
+#endif
+#ifdef PCI_ERR_UNC_SURPDN
+ {PCI_ERR_UNC_SURPDN, "Surprise Down"},
+#endif
+#ifdef PCI_ERR_UNC_POISON_TLP
+ {PCI_ERR_UNC_POISON_TLP, "Poisoned TLP"},
+#endif
+#ifdef PCI_ERR_UNC_FCP
+ {PCI_ERR_UNC_FCP, "Flow Control Protocol"},
+#endif
+#ifdef PCI_ERR_UNC_COMP_TIME
+ {PCI_ERR_UNC_COMP_TIME, "Completion Timeout"},
+#endif
+#ifdef PCI_ERR_UNC_COMP_ABORT
+ {PCI_ERR_UNC_COMP_ABORT, "Completer Abort"},
+#endif
+#ifdef PCI_ERR_UNC_UNX_COMP
+ {PCI_ERR_UNC_UNX_COMP, "Unexpected Completion"},
+#endif
+#ifdef PCI_ERR_UNC_RX_OVER
+ {PCI_ERR_UNC_RX_OVER, "Receiver Overflow"},
+#endif
+#ifdef PCI_ERR_UNC_MALF_TLP
+ {PCI_ERR_UNC_MALF_TLP, "Malformed TLP"},
+#endif
+#ifdef PCI_ERR_UNC_ECRC
+ {PCI_ERR_UNC_ECRC, "ECRC Error Status"},
+#endif
+#ifdef PCI_ERR_UNC_UNSUP
+ {PCI_ERR_UNC_UNSUP, "Unsupported Request"},
+#endif
+#ifdef PCI_ERR_UNC_ACSV
+ {PCI_ERR_UNC_ACSV, "ACS Violation"},
+#endif
+#ifdef PCI_ERR_UNC_INTN
+ {PCI_ERR_UNC_INTN, "Internal"},
+#endif
+#ifdef PCI_ERR_UNC_MCBTLP
+ {PCI_ERR_UNC_MCBTLP, "MC blocked TLP"},
+#endif
+#ifdef PCI_ERR_UNC_ATOMEG
+ {PCI_ERR_UNC_ATOMEG, "Atomic egress blocked"},
+#endif
+#ifdef PCI_ERR_UNC_TLPPRE
+ {PCI_ERR_UNC_TLPPRE, "TLP prefix blocked"},
+#endif
+};
+static const int pcie_aer_ues_num = STATIC_ARRAY_SIZE(pcie_aer_ues);
+
+/* Correctable Error Status */
+static pcie_error_t pcie_aer_ces[] = {
+#ifdef PCI_ERR_COR_RCVR
+ {PCI_ERR_COR_RCVR, "Receiver Error Status"},
+#endif
+#ifdef PCI_ERR_COR_BAD_TLP
+ {PCI_ERR_COR_BAD_TLP, "Bad TLP Status"},
+#endif
+#ifdef PCI_ERR_COR_BAD_DLLP
+ {PCI_ERR_COR_BAD_DLLP, "Bad DLLP Status"},
+#endif
+#ifdef PCI_ERR_COR_REP_ROLL
+ {PCI_ERR_COR_REP_ROLL, "REPLAY_NUM Rollover"},
+#endif
+#ifdef PCI_ERR_COR_REP_TIMER
+ {PCI_ERR_COR_REP_TIMER, "Replay Timer Timeout"},
+#endif
+#ifdef PCI_ERR_COR_ADV_NFAT
+ {PCI_ERR_COR_ADV_NFAT, "Advisory Non-Fatal"},
+#endif
+#ifdef PCI_ERR_COR_INTERNAL
+ {PCI_ERR_COR_INTERNAL, "Corrected Internal"},
+#endif
+#ifdef PCI_ERR_COR_LOG_OVER
+ {PCI_ERR_COR_LOG_OVER, "Header Log Overflow"},
+#endif
+};
+static const int pcie_aer_ces_num = STATIC_ARRAY_SIZE(pcie_aer_ces);
+
+static int pcie_add_device(llist_t *list, int domain, uint8_t bus,
+ uint8_t device, uint8_t fn) {
+ llentry_t *entry;
+ pcie_device_t *dev = calloc(1, sizeof(*dev));
+ if (dev == NULL) {
+ ERROR(PCIE_ERRORS_PLUGIN ": Failed to allocate device");
+ return -ENOMEM;
+ }
+
+ dev->domain = domain;
+ dev->bus = bus;
+ dev->device = device;
+ dev->function = fn;
+ dev->cap_exp = -1;
+ dev->ecap_aer = -1;
+ entry = llentry_create(NULL, dev);
+ if (entry == NULL) {
+ ERROR(PCIE_ERRORS_PLUGIN ": Failed to create llentry");
+ sfree(dev);
+ return -ENOMEM;
+ }
+ llist_append(list, entry);
+
+ DEBUG(PCIE_ERRORS_PLUGIN ": pci device added to list: %04x:%02x:%02x.%d",
+ domain, bus, device, fn);
+ return 0;
+}
+
+static void pcie_clear_list(llist_t *list) {
+ if (list == NULL)
+ return;
+
+ for (llentry_t *e = llist_head(list); e != NULL; e = e->next)
+ sfree(e->value);
+
+ llist_destroy(list);
+}
+
+static int pcie_list_devices_proc(llist_t *dev_list) {
+ FILE *fd;
+ char file_name[PCIE_NAME_LEN];
+ char buf[PCIE_BUFF_SIZE];
+ unsigned int i = 0;
+ int ret = 0;
+
+ if (dev_list == NULL)
+ return -EINVAL;
+
+ snprintf(file_name, sizeof(file_name), "%s/devices", pcie_config.access_dir);
+ fd = fopen(file_name, "r");
+ if (!fd) {
+ char errbuf[PCIE_BUFF_SIZE];
+ ERROR(PCIE_ERRORS_PLUGIN ": Cannot open file %s to get devices list: %s",
+ file_name, sstrerror(errno, errbuf, sizeof(errbuf)));
+ return -ENOENT;
+ }
+
+ while (fgets(buf, sizeof(buf), fd)) {
+ unsigned int slot;
+ uint8_t bus, dev, fn;
+
+ if (sscanf(buf, "%x", &slot) != 1) {
+ ERROR(PCIE_ERRORS_PLUGIN ": Failed to read line %u from %s", i + 1,
+ file_name);
+ continue;
+ }
+
+ bus = slot >> 8U;
+ dev = PCIE_DEV(slot);
+ fn = PCIE_FN(slot);
+ ret = pcie_add_device(dev_list, 0, bus, dev, fn);
+ if (ret)
+ break;
+
+ ++i;
+ }
+
+ fclose(fd);
+ return ret;
+}
+
+static int pcie_list_devices_sysfs(llist_t *dev_list) {
+ DIR *dir;
+ struct dirent *item;
+ char dir_name[PCIE_NAME_LEN];
+ int ret = 0;
+
+ if (dev_list == NULL)
+ return -EINVAL;
+
+ snprintf(dir_name, sizeof(dir_name), "%s/devices", pcie_config.access_dir);
+ dir = opendir(dir_name);
+ if (!dir) {
+ char errbuf[PCIE_BUFF_SIZE];
+ ERROR(PCIE_ERRORS_PLUGIN ": Cannot open dir %s to get devices list: %s",
+ dir_name, sstrerror(errno, errbuf, sizeof(errbuf)));
+ return -ENOENT;
+ }
+
+ while ((item = readdir(dir))) {
+ unsigned int dom, bus, dev;
+ int fn;
+
+ /* Omit special non-device entries */
+ if (item->d_name[0] == '.')
+ continue;
+
+ if (sscanf(item->d_name, "%x:%x:%x.%d", &dom, &bus, &dev, &fn) != 4) {
+ ERROR(PCIE_ERRORS_PLUGIN ": Failed to parse entry %s", item->d_name);
+ continue;
+ }
+
+ ret = pcie_add_device(dev_list, dom, bus, dev, fn);
+ if (ret)
+ break;
+ }
+
+ closedir(dir);
+ return ret;
+}
+
+static void pcie_close(pcie_device_t *dev) {
+ if (close(dev->fd) == -1) {
+ char errbuf[PCIE_BUFF_SIZE];
+ ERROR(PCIE_ERRORS_PLUGIN ": Failed to close %04x:%02x:%02x.%d, fd=%d: %s",
+ dev->domain, dev->bus, dev->device, dev->function, dev->fd,
+ sstrerror(errno, errbuf, sizeof(errbuf)));
+ }
+
+ dev->fd = -1;
+}
+
+static int pcie_open(pcie_device_t *dev, const char *name) {
+ dev->fd = open(name, O_RDONLY);
+ if (dev->fd == -1) {
+ char errbuf[PCIE_BUFF_SIZE];
+ ERROR(PCIE_ERRORS_PLUGIN ": Failed to open file %s: %s", name,
+ sstrerror(errno, errbuf, sizeof(errbuf)));
+ return -ENOENT;
+ }
+
+ return 0;
+}
+
+static int pcie_open_proc(pcie_device_t *dev) {
+ char file_name[PCIE_NAME_LEN];
+
+ snprintf(file_name, sizeof(file_name), "%s/%02x/%02x.%d",
+ pcie_config.access_dir, dev->bus, dev->device, dev->function);
+
+ return pcie_open(dev, file_name);
+}
+
+static int pcie_open_sysfs(pcie_device_t *dev) {
+ char file_name[PCIE_NAME_LEN];
+
+ snprintf(file_name, sizeof(file_name), "%s/devices/%04x:%02x:%02x.%d/config",
+ pcie_config.access_dir, dev->domain, dev->bus, dev->device,
+ dev->function);
+
+ return pcie_open(dev, file_name);
+}
+
+static int pcie_read(pcie_device_t *dev, void *buff, int size, int pos) {
+ int len = pread(dev->fd, buff, size, pos);
+ if (len == size)
+ return 0;
+
+ if (len == -1) {
+ char errbuf[PCIE_BUFF_SIZE];
+ ERROR(PCIE_ERRORS_PLUGIN ": Failed to read %04x:%02x:%02x.%d at pos %d: %s",
+ dev->domain, dev->bus, dev->device, dev->function, pos,
+ sstrerror(errno, errbuf, sizeof(errbuf)));
+ } else {
+ ERROR(PCIE_ERRORS_PLUGIN
+ ": %04x:%02x:%02x.%d Read only %d bytes, should be %d",
+ dev->domain, dev->bus, dev->device, dev->function, len, size);
+ }
+ return -1;
+}
+
+static uint8_t pcie_read8(pcie_device_t *dev, int pos) {
+ uint8_t value;
+ if (pcie_fops.read(dev, &value, 1, pos))
+ return 0;
+ return value;
+}
+
+static uint16_t pcie_read16(pcie_device_t *dev, int pos) {
+ uint16_t value;
+ if (pcie_fops.read(dev, &value, 2, pos))
+ return 0;
+ return value;
+}
+
+static uint32_t pcie_read32(pcie_device_t *dev, int pos) {
+ uint32_t value;
+ if (pcie_fops.read(dev, &value, 4, pos))
+ return 0;
+ return value;
+}
+
+static void pcie_dispatch_notification(pcie_device_t *dev, notification_t *n,
+ const char *type,
+ const char *type_instance) {
+ sstrncpy(n->host, hostname_g, sizeof(n->host));
+ snprintf(n->plugin_instance, sizeof(n->plugin_instance), "%04x:%02x:%02x.%d",
+ dev->domain, dev->bus, dev->device, dev->function);
+ sstrncpy(n->type, type, sizeof(n->type));
+ sstrncpy(n->type_instance, type_instance, sizeof(n->type_instance));
+
+ plugin_dispatch_notification(n);
+}
+
+/* Report errors found in AER Correctable Error Status register */
+static void pcie_dispatch_correctable_errors(pcie_device_t *dev,
+ uint32_t errors, uint32_t masked) {
+ for (int i = 0; i < pcie_aer_ces_num; i++) {
+ pcie_error_t *err = pcie_aer_ces + i;
+ notification_t n = {.severity = NOTIF_WARNING,
+ .time = cdtime(),
+ .plugin = PCIE_ERRORS_PLUGIN,
+ .meta = NULL};
+
+ /* If not specifically set by config option omit masked errors */
+ if (!pcie_config.notif_masked && (err->mask & masked))
+ continue;
+
+ if (err->mask & errors) {
+ /* Error already reported, notify only if persistent is set */
+ if (!pcie_config.persistent && (err->mask & dev->correctable_errors))
+ continue;
+
+ DEBUG(PCIE_ERRORS_PLUGIN ": %04x:%02x:%02x.%d: %s set", dev->domain,
+ dev->bus, dev->device, dev->function, err->desc);
+ snprintf(n.message, sizeof(n.message), "Correctable Error set: %s",
+ err->desc);
+ pcie_dispatch_notification(dev, &n, PCIE_ERROR, PCIE_SEV_CE);
+
+ } else if (err->mask & dev->correctable_errors) {
+ DEBUG(PCIE_ERRORS_PLUGIN ": %04x:%02x:%02x.%d: %s cleared", dev->domain,
+ dev->bus, dev->device, dev->function, err->desc);
+
+ n.severity = NOTIF_OKAY;
+ snprintf(n.message, sizeof(n.message), "Correctable Error cleared: %s",
+ err->desc);
+ pcie_dispatch_notification(dev, &n, PCIE_ERROR, PCIE_SEV_CE);
+ }
+ }
+}
+
+/* Report errors found in AER Uncorrectable Error Status register */
+static void pcie_dispatch_uncorrectable_errors(pcie_device_t *dev,
+ uint32_t errors, uint32_t masked,
+ uint32_t severity) {
+ for (int i = 0; i < pcie_aer_ues_num; i++) {
+ pcie_error_t *err = pcie_aer_ues + i;
+ const char *type_instance =
+ (severity & err->mask) ? PCIE_SEV_FATAL : PCIE_SEV_NOFATAL;
+ notification_t n = {
+ .time = cdtime(), .plugin = PCIE_ERRORS_PLUGIN, .meta = NULL};
+
+ /* If not specifically set by config option omit masked errors */
+ if (!pcie_config.notif_masked && (err->mask & masked))
+ continue;
+
+ if (err->mask & errors) {
+ /* Error already reported, notify only if persistent is set */
+ if (!pcie_config.persistent && (err->mask & dev->uncorrectable_errors))
+ continue;
+
+ DEBUG(PCIE_ERRORS_PLUGIN ": %04x:%02x:%02x.%d: %s(%s) set", dev->domain,
+ dev->bus, dev->device, dev->function, err->desc, type_instance);
+
+ n.severity = (severity & err->mask) ? NOTIF_FAILURE : NOTIF_WARNING;
+ snprintf(n.message, sizeof(n.message), "Uncorrectable(%s) Error set: %s",
+ type_instance, err->desc);
+ pcie_dispatch_notification(dev, &n, PCIE_ERROR, type_instance);
+
+ } else if (err->mask & dev->uncorrectable_errors) {
+ DEBUG(PCIE_ERRORS_PLUGIN ": %04x:%02x:%02x.%d: %s(%s) cleared",
+ dev->domain, dev->bus, dev->device, dev->function, err->desc,
+ type_instance);
+
+ n.severity = NOTIF_OKAY;
+ snprintf(n.message, sizeof(n.message),
+ "Uncorrectable(%s) Error cleared: %s", type_instance, err->desc);
+ pcie_dispatch_notification(dev, &n, PCIE_ERROR, type_instance);
+ }
+ }
+}
+
+/* Find offset of PCI Express Capability Structure
+ * in PCI configuration space.
+ * Returns offset, -1 if not found.
+**/
+static int pcie_find_cap_exp(pcie_device_t *dev) {
+ int pos = pcie_read8(dev, PCI_CAPABILITY_LIST) & ~3;
+
+ while (pos) {
+ uint8_t id = pcie_read8(dev, pos + PCI_CAP_LIST_ID);
+
+ if (id == 0xff)
+ break;
+ if (id == PCI_CAP_ID_EXP)
+ return pos;
+
+ pos = pcie_read8(dev, pos + PCI_CAP_LIST_NEXT) & ~3;
+ }
+
+ DEBUG(PCIE_ERRORS_PLUGIN ": Cannot find CAP EXP for %04x:%02x:%02x.%d",
+ dev->domain, dev->bus, dev->device, dev->function);
+
+ return -1;
+}
+
+/* Find offset of Advanced Error Reporting Capability.
+ * Returns AER offset, -1 if not found.
+**/
+static int pcie_find_ecap_aer(pcie_device_t *dev) {
+ int pos = PCIE_ECAP_OFFSET;
+ uint32_t header = pcie_read32(dev, pos);
+ int id = PCI_EXT_CAP_ID(header);
+ int next = PCI_EXT_CAP_NEXT(header);
+
+ if (!id && !next)
+ return -1;
+
+ if (id == PCI_EXT_CAP_ID_ERR)
+ return pos;
+
+ while (next) {
+ if (next <= PCIE_ECAP_OFFSET)
+ break;
+
+ header = pcie_read32(dev, next);
+ id = PCI_EXT_CAP_ID(header);
+
+ if (id == PCI_EXT_CAP_ID_ERR)
+ return next;
+
+ next = PCI_EXT_CAP_NEXT(header);
+ }
+
+ return -1;
+}
+
+static void pcie_check_dev_status(pcie_device_t *dev, int pos) {
+ /* Read Device Status register with mask for errors only */
+ uint16_t new_status = pcie_read16(dev, pos + PCI_EXP_DEVSTA) & 0xf;
+
+ /* Check if anything new should be reported */
+ if (!(pcie_config.persistent && new_status) &&
+ (new_status == dev->device_status))
+ return;
+
+ /* Report errors found in Device Status register */
+ for (int i = 0; i < pcie_base_errors_num; i++) {
+ pcie_error_t *err = pcie_base_errors + i;
+ const char *type_instance = (err->mask == PCI_EXP_DEVSTA_FED)
+ ? PCIE_SEV_FATAL
+ : (err->mask == PCI_EXP_DEVSTA_CED)
+ ? PCIE_SEV_CE
+ : PCIE_SEV_NOFATAL;
+ const int severity =
+ (err->mask == PCI_EXP_DEVSTA_FED) ? NOTIF_FAILURE : NOTIF_WARNING;
+ notification_t n = {.severity = severity,
+ .time = cdtime(),
+ .plugin = PCIE_ERRORS_PLUGIN,
+ .meta = NULL};
+
+ if (err->mask & new_status) {
+ /* Error already reported, notify only if persistent is set */
+ if (!pcie_config.persistent && (err->mask & dev->device_status))
+ continue;
+
+ DEBUG(PCIE_ERRORS_PLUGIN ": %04x:%02x:%02x.%d: %s set", dev->domain,
+ dev->bus, dev->device, dev->function, err->desc);
+ snprintf(n.message, sizeof(n.message), "Device Status Error set: %s",
+ err->desc);
+ pcie_dispatch_notification(dev, &n, PCIE_ERROR, type_instance);
+
+ } else if (err->mask & dev->device_status) {
+ DEBUG(PCIE_ERRORS_PLUGIN ": %04x:%02x:%02x.%d: %s cleared", dev->domain,
+ dev->bus, dev->device, dev->function, err->desc);
+ n.severity = NOTIF_OKAY;
+ snprintf(n.message, sizeof(n.message), "Device Status Error cleared: %s",
+ err->desc);
+ pcie_dispatch_notification(dev, &n, PCIE_ERROR, type_instance);
+ }
+ }
+
+ dev->device_status = new_status;
+}
+
+static void pcie_check_aer(pcie_device_t *dev, int pos) {
+ /* Check for AER uncorrectable errors */
+ uint32_t errors = pcie_read32(dev, pos + PCI_ERR_UNCOR_STATUS);
+
+ if ((pcie_config.persistent && errors) ||
+ (errors != dev->uncorrectable_errors)) {
+ uint32_t masked = pcie_read32(dev, pos + PCI_ERR_UNCOR_MASK);
+ uint32_t severity = pcie_read32(dev, pos + PCI_ERR_UNCOR_SEVER);
+ pcie_dispatch_uncorrectable_errors(dev, errors, masked, severity);
+ }
+ dev->uncorrectable_errors = errors;
+
+ /* Check for AER correctable errors */
+ errors = pcie_read32(dev, pos + PCI_ERR_COR_STATUS);
+ if ((pcie_config.persistent && errors) ||
+ (errors != dev->correctable_errors)) {
+ uint32_t masked = pcie_read32(dev, pos + PCI_ERR_COR_MASK);
+ pcie_dispatch_correctable_errors(dev, errors, masked);
+ }
+ dev->correctable_errors = errors;
+}
+
+static int pcie_process_devices(llist_t *devs) {
+ int ret = 0;
+ if (devs == NULL)
+ return -1;
+
+ for (llentry_t *e = llist_head(devs); e != NULL; e = e->next) {
+ pcie_device_t *dev = e->value;
+
+ if (pcie_fops.open(dev) == 0) {
+ pcie_check_dev_status(dev, dev->cap_exp);
+ if (dev->ecap_aer != -1)
+ pcie_check_aer(dev, dev->ecap_aer);
+
+ pcie_fops.close(dev);
+ } else {
+ notification_t n = {.severity = NOTIF_FAILURE,
+ .time = cdtime(),
+ .message = "Failed to read device status",
+ .plugin = PCIE_ERRORS_PLUGIN,
+ .meta = NULL};
+ pcie_dispatch_notification(dev, &n, "", "");
+ ret = -1;
+ }
+ }
+
+ return ret;
+}
+
+/* This function is to be called during init to filter out no pcie devices */
+static void pcie_preprocess_devices(llist_t *devs) {
+ llentry_t *e_next;
+
+ if (devs == NULL)
+ return;
+
+ for (llentry_t *e = llist_head(devs); e != NULL; e = e_next) {
+ pcie_device_t *dev = e->value;
+ _Bool del = 0;
+
+ if (pcie_fops.open(dev) == 0) {
+ uint16_t status = pcie_read16(dev, PCI_STATUS);
+ if (status & PCI_STATUS_CAP_LIST)
+ dev->cap_exp = pcie_find_cap_exp(dev);
+
+ /* Every PCIe device must have Capability Structure */
+ if (dev->cap_exp == -1) {
+ DEBUG(PCIE_ERRORS_PLUGIN ": Not PCI Express device: %04x:%02x:%02x.%d",
+ dev->domain, dev->bus, dev->device, dev->function);
+ del = 1;
+ } else {
+ dev->ecap_aer = pcie_find_ecap_aer(dev);
+ if (dev->ecap_aer == -1)
+ INFO(PCIE_ERRORS_PLUGIN
+ ": Device is not AER capable: %04x:%02x:%02x.%d",
+ dev->domain, dev->bus, dev->device, dev->function);
+ }
+
+ pcie_fops.close(dev);
+ } else {
+ ERROR(PCIE_ERRORS_PLUGIN ": %04x:%02x:%02x.%d: failed to open",
+ dev->domain, dev->bus, dev->device, dev->function);
+ del = 1;
+ }
+
+ e_next = e->next;
+ if (del) {
+ sfree(dev);
+ llist_remove(devs, e);
+ llentry_destroy(e);
+ }
+ }
+}
+
+static int pcie_plugin_read(__attribute__((unused)) user_data_t *ud) {
+
+ if (pcie_process_devices(pcie_dev_list) < 0) {
+ ERROR(PCIE_ERRORS_PLUGIN ": Failed to read devices state");
+ return -1;
+ }
+ return 0;
+}
+
+static void pcie_access_config(void) {
+ /* Set functions for register access to
+ * use proc or sysfs depending on config. */
+ if (pcie_config.use_sysfs) {
+ pcie_fops.list_devices = pcie_list_devices_sysfs;
+ pcie_fops.open = pcie_open_sysfs;
+ if (pcie_config.access_dir[0] == '\0')
+ sstrncpy(pcie_config.access_dir, PCIE_DEFAULT_SYSFSDIR,
+ sizeof(pcie_config.access_dir));
+ } else {
+ /* use proc */
+ pcie_fops.list_devices = pcie_list_devices_proc;
+ pcie_fops.open = pcie_open_proc;
+ if (pcie_config.access_dir[0] == '\0')
+ sstrncpy(pcie_config.access_dir, PCIE_DEFAULT_PROCDIR,
+ sizeof(pcie_config.access_dir));
+ }
+ /* Common functions */
+ pcie_fops.close = pcie_close;
+ pcie_fops.read = pcie_read;
+}
+
+static int pcie_plugin_config(oconfig_item_t *ci) {
+
+ for (int i = 0; i < ci->children_num; i++) {
+ oconfig_item_t *child = ci->children + i;
+ int status = 0;
+
+ if (strcasecmp("Source", child->key) == 0) {
+ if ((child->values_num != 1) ||
+ (child->values[0].type != OCONFIG_TYPE_STRING)) {
+ status = -1;
+ } else if (strcasecmp("proc", child->values[0].value.string) == 0) {
+ pcie_config.use_sysfs = 0;
+ } else if (strcasecmp("sysfs", child->values[0].value.string) != 0) {
+ ERROR(PCIE_ERRORS_PLUGIN ": Allowed sources are 'proc' or 'sysfs'.");
+ status = -1;
+ }
+ } else if (strcasecmp("AccessDir", child->key) == 0) {
+ status = cf_util_get_string_buffer(child, pcie_config.access_dir,
+ sizeof(pcie_config.access_dir));
+ } else if (strcasecmp("ReportMasked", child->key) == 0) {
+ status = cf_util_get_boolean(child, &pcie_config.notif_masked);
+ } else if (strcasecmp("PersistentNotifications", child->key) == 0) {
+ status = cf_util_get_boolean(child, &pcie_config.persistent);
+ } else {
+ ERROR(PCIE_ERRORS_PLUGIN ": Invalid configuration option \"%s\".",
+ child->key);
+ pcie_config.config_error = 1;
+ break;
+ }
+
+ if (status) {
+ ERROR(PCIE_ERRORS_PLUGIN ": Invalid configuration parameter \"%s\".",
+ child->key);
+ pcie_config.config_error = 1;
+ break;
+ }
+ }
+
+ return 0;
+}
+
+static int pcie_shutdown(void) {
+ pcie_clear_list(pcie_dev_list);
+ pcie_dev_list = NULL;
+
+ return 0;
+}
+
+static int pcie_init(void) {
+ if (pcie_config.config_error) {
+ ERROR(PCIE_ERRORS_PLUGIN
+ ": Error in configuration, failed to init plugin.");
+ return -1;
+ }
+
+ pcie_access_config();
+ pcie_dev_list = llist_create();
+ if (pcie_fops.list_devices(pcie_dev_list) != 0) {
+ ERROR(PCIE_ERRORS_PLUGIN ": Failed to find devices.");
+ pcie_shutdown();
+ return -1;
+ }
+ pcie_preprocess_devices(pcie_dev_list);
+ if (llist_size(pcie_dev_list) == 0) {
+ /* No any PCI Express devices were found on the system */
+ ERROR(PCIE_ERRORS_PLUGIN ": No PCIe devices found in %s",
+ pcie_config.access_dir);
+ pcie_shutdown();
+ return -1;
+ }
+
+ return 0;
+}
+
+void module_register(void) {
+ plugin_register_init(PCIE_ERRORS_PLUGIN, pcie_init);
+ plugin_register_complex_config(PCIE_ERRORS_PLUGIN, pcie_plugin_config);
+ plugin_register_complex_read(NULL, PCIE_ERRORS_PLUGIN, pcie_plugin_read, 0,
+ NULL);
+ plugin_register_shutdown(PCIE_ERRORS_PLUGIN, pcie_shutdown);
+}
--- /dev/null
+/**
+ * collectd - src/pcie_errors.c
+ *
+ * Copyright(c) 2018 Intel Corporation. All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ * Authors:
+ * Kamil Wiatrowski <kamilx.wiatrowski@intel.com>
+ **/
+
+#include "pcie_errors.c" /* sic */
+#include "testing.h"
+
+#define TEST_DOMAIN 1
+#define TEST_BUS 5
+#define TEST_DEVICE 0xc
+#define TEST_FUNCTION 2
+#define TEST_DEVICE_STR "0001:05:0c.2"
+
+#define G_BUFF_LEN 4
+
+static notification_t last_notif;
+static char g_buff[G_BUFF_LEN];
+
+/* mock functions */
+int plugin_dispatch_notification(const notification_t *notif) {
+ last_notif = *notif;
+ return ENOTSUP;
+}
+
+ssize_t pread(__attribute__((unused)) int fd, void *buf, size_t count,
+ __attribute__((unused)) off_t offset) {
+ if (count == 0 || count > G_BUFF_LEN)
+ return -1;
+
+ memcpy(buf, g_buff, count);
+ return count;
+}
+/* end mock functions */
+
+DEF_TEST(clear_dev_list) {
+ pcie_clear_list(NULL);
+
+ llist_t *test_list = llist_create();
+ CHECK_NOT_NULL(test_list);
+
+ pcie_device_t *dev = calloc(1, sizeof(*dev));
+ CHECK_NOT_NULL(dev);
+
+ llentry_t *entry = llentry_create(NULL, dev);
+ CHECK_NOT_NULL(entry);
+
+ llist_append(test_list, entry);
+
+ for (llentry_t *e = llist_head(test_list); e != NULL; e = e->next) {
+ EXPECT_EQ_UINT64(dev, e->value);
+ }
+
+ pcie_clear_list(test_list);
+
+ return 0;
+}
+
+DEF_TEST(add_to_list) {
+ llist_t *test_list = llist_create();
+ CHECK_NOT_NULL(test_list);
+
+ int ret = pcie_add_device(test_list, TEST_DOMAIN, TEST_BUS, TEST_DEVICE,
+ TEST_FUNCTION);
+ EXPECT_EQ_INT(0, ret);
+
+ llentry_t *e = llist_head(test_list);
+ CHECK_NOT_NULL(e);
+ OK(NULL == e->next);
+
+ pcie_device_t *dev = e->value;
+ CHECK_NOT_NULL(dev);
+ EXPECT_EQ_INT(TEST_DOMAIN, dev->domain);
+ EXPECT_EQ_INT(TEST_BUS, dev->bus);
+ EXPECT_EQ_INT(TEST_DEVICE, dev->device);
+ EXPECT_EQ_INT(TEST_FUNCTION, dev->function);
+ EXPECT_EQ_INT(-1, dev->cap_exp);
+ EXPECT_EQ_INT(-1, dev->ecap_aer);
+
+ pcie_clear_list(test_list);
+
+ return 0;
+}
+
+DEF_TEST(pcie_read) {
+ int ret;
+ pcie_device_t dev = {0};
+ uint32_t val = 0;
+ g_buff[0] = 4;
+ g_buff[1] = 3;
+ g_buff[2] = 2;
+ g_buff[3] = 1;
+
+ ret = pcie_read(&dev, &val, 1, 0);
+ EXPECT_EQ_INT(0, ret);
+ EXPECT_EQ_INT(4, val);
+
+ ret = pcie_read(&dev, &val, 2, 0);
+ EXPECT_EQ_INT(0, ret);
+ EXPECT_EQ_INT(0x304, val);
+
+ ret = pcie_read(&dev, &val, 3, 0);
+ EXPECT_EQ_INT(0, ret);
+ EXPECT_EQ_INT(0x20304, val);
+
+ ret = pcie_read(&dev, &val, 4, 0);
+ EXPECT_EQ_INT(0, ret);
+ EXPECT_EQ_INT(0x1020304, val);
+
+ ret = pcie_read(&dev, &val, G_BUFF_LEN + 1, 0);
+ EXPECT_EQ_INT(-1, ret);
+
+ pcie_fops.read = pcie_read;
+
+ uint8_t val8 = pcie_read8(&dev, 0);
+ EXPECT_EQ_INT(4, val8);
+
+ uint16_t val16 = pcie_read16(&dev, 0);
+ EXPECT_EQ_INT(0x304, val16);
+
+ uint32_t val32 = pcie_read32(&dev, 0);
+ EXPECT_EQ_INT(0x1020304, val32);
+
+ return 0;
+}
+
+DEF_TEST(dispatch_notification) {
+ pcie_device_t dev = {0, TEST_DOMAIN, TEST_BUS, TEST_DEVICE, TEST_FUNCTION,
+ 0, 0, 0, 0, 0};
+ cdtime_t t = cdtime();
+ notification_t n = {
+ .severity = 1, .time = t, .plugin = "pcie_errors_test", .meta = NULL};
+
+ pcie_dispatch_notification(&dev, &n, "test_type", "test_type_instance");
+ EXPECT_EQ_INT(1, last_notif.severity);
+ EXPECT_EQ_UINT64(t, last_notif.time);
+ EXPECT_EQ_STR("pcie_errors_test", last_notif.plugin);
+ OK(NULL == last_notif.meta);
+ EXPECT_EQ_STR(hostname_g, last_notif.host);
+ EXPECT_EQ_STR(TEST_DEVICE_STR, last_notif.plugin_instance);
+ EXPECT_EQ_STR("test_type", last_notif.type);
+ EXPECT_EQ_STR("test_type_instance", last_notif.type_instance);
+
+ return 0;
+}
+
+DEF_TEST(access_config) {
+ pcie_config.use_sysfs = 0;
+ pcie_access_config();
+ EXPECT_EQ_UINT64(pcie_list_devices_proc, pcie_fops.list_devices);
+ EXPECT_EQ_UINT64(pcie_open_proc, pcie_fops.open);
+ EXPECT_EQ_UINT64(pcie_close, pcie_fops.close);
+ EXPECT_EQ_UINT64(pcie_read, pcie_fops.read);
+ EXPECT_EQ_STR(PCIE_DEFAULT_PROCDIR, pcie_config.access_dir);
+
+ sstrncpy(pcie_config.access_dir, "Test", sizeof(pcie_config.access_dir));
+ pcie_access_config();
+ EXPECT_EQ_STR("Test", pcie_config.access_dir);
+
+ pcie_config.use_sysfs = 1;
+ pcie_access_config();
+ EXPECT_EQ_UINT64(pcie_list_devices_sysfs, pcie_fops.list_devices);
+ EXPECT_EQ_UINT64(pcie_open_sysfs, pcie_fops.open);
+ EXPECT_EQ_UINT64(pcie_close, pcie_fops.close);
+ EXPECT_EQ_UINT64(pcie_read, pcie_fops.read);
+ EXPECT_EQ_STR("Test", pcie_config.access_dir);
+
+ pcie_config.access_dir[0] = '\0';
+ pcie_access_config();
+ EXPECT_EQ_STR(PCIE_DEFAULT_SYSFSDIR, pcie_config.access_dir);
+
+ return 0;
+}
+
+DEF_TEST(plugin_config_fail) {
+ oconfig_item_t test_cfg_parent = {"pcie_errors", NULL, 0, NULL, NULL, 0};
+ char value_buff[256] = "procs";
+ char key_buff[256] = "Sources";
+ oconfig_value_t test_cfg_value = {{value_buff}, OCONFIG_TYPE_STRING};
+ oconfig_item_t test_cfg = {
+ key_buff, &test_cfg_value, 1, &test_cfg_parent, NULL, 0};
+
+ test_cfg_parent.children = &test_cfg;
+ test_cfg_parent.children_num = 1;
+
+ int ret = pcie_plugin_config(&test_cfg_parent);
+ EXPECT_EQ_INT(0, ret);
+ EXPECT_EQ_INT(1, pcie_config.config_error);
+ pcie_config.config_error = 0;
+
+ sstrncpy(key_buff, "Source", sizeof(key_buff));
+ ret = pcie_plugin_config(&test_cfg_parent);
+ EXPECT_EQ_INT(0, ret);
+ EXPECT_EQ_INT(1, pcie_config.config_error);
+ pcie_config.config_error = 0;
+
+ sstrncpy(value_buff, "proc", sizeof(value_buff));
+ test_cfg_value.type = OCONFIG_TYPE_NUMBER;
+ ret = pcie_plugin_config(&test_cfg_parent);
+ EXPECT_EQ_INT(0, ret);
+ EXPECT_EQ_INT(1, pcie_config.config_error);
+ pcie_config.config_error = 0;
+
+ sstrncpy(key_buff, "AccessDir", sizeof(key_buff));
+ ret = pcie_plugin_config(&test_cfg_parent);
+ EXPECT_EQ_INT(0, ret);
+ EXPECT_EQ_INT(1, pcie_config.config_error);
+ pcie_config.config_error = 0;
+
+ return 0;
+}
+
+DEF_TEST(plugin_config) {
+ oconfig_item_t test_cfg_parent = {"pcie_errors", NULL, 0, NULL, NULL, 0};
+ char value_buff[256] = "proc";
+ char key_buff[256] = "source";
+ oconfig_value_t test_cfg_value = {{value_buff}, OCONFIG_TYPE_STRING};
+ oconfig_item_t test_cfg = {
+ key_buff, &test_cfg_value, 1, &test_cfg_parent, NULL, 0};
+
+ test_cfg_parent.children = &test_cfg;
+ test_cfg_parent.children_num = 1;
+
+ pcie_config.use_sysfs = 1;
+ int ret = pcie_plugin_config(&test_cfg_parent);
+ EXPECT_EQ_INT(0, ret);
+ EXPECT_EQ_INT(0, pcie_config.config_error);
+ EXPECT_EQ_INT(0, pcie_config.use_sysfs);
+
+ pcie_config.use_sysfs = 1;
+ sstrncpy(value_buff, "sysfs", sizeof(value_buff));
+ ret = pcie_plugin_config(&test_cfg_parent);
+ EXPECT_EQ_INT(0, ret);
+ EXPECT_EQ_INT(0, pcie_config.config_error);
+ EXPECT_EQ_INT(1, pcie_config.use_sysfs);
+
+ sstrncpy(key_buff, "AccessDir", sizeof(key_buff));
+ sstrncpy(value_buff, "some/test/value", sizeof(value_buff));
+ ret = pcie_plugin_config(&test_cfg_parent);
+ EXPECT_EQ_INT(0, ret);
+ EXPECT_EQ_INT(0, pcie_config.config_error);
+ EXPECT_EQ_STR("some/test/value", pcie_config.access_dir);
+
+ memset(&test_cfg_value.value, 0, sizeof(test_cfg_value.value));
+ test_cfg_value.value.boolean = 1;
+ test_cfg_value.type = OCONFIG_TYPE_BOOLEAN;
+ sstrncpy(key_buff, "ReportMasked", sizeof(key_buff));
+ ret = pcie_plugin_config(&test_cfg_parent);
+ EXPECT_EQ_INT(0, ret);
+ EXPECT_EQ_INT(0, pcie_config.config_error);
+ EXPECT_EQ_INT(1, pcie_config.notif_masked);
+
+ sstrncpy(key_buff, "PersistentNotifications", sizeof(key_buff));
+ ret = pcie_plugin_config(&test_cfg_parent);
+ EXPECT_EQ_INT(0, ret);
+ EXPECT_EQ_INT(0, pcie_config.config_error);
+ EXPECT_EQ_INT(1, pcie_config.persistent);
+
+ return 0;
+}
+
+#define BAD_TLP_SET_MSG "Correctable Error set: Bad TLP Status"
+#define BAD_TLP_CLEAR_MSG "Correctable Error cleared: Bad TLP Status"
+
+DEF_TEST(dispatch_correctable_errors) {
+ pcie_device_t dev = {0, TEST_DOMAIN, TEST_BUS, TEST_DEVICE, TEST_FUNCTION,
+ 0, 0, 0, 0, 0};
+ pcie_config.notif_masked = 0;
+ pcie_config.persistent = 0;
+
+ pcie_dispatch_correctable_errors(&dev, PCI_ERR_COR_BAD_TLP,
+ ~(PCI_ERR_COR_BAD_TLP));
+ EXPECT_EQ_INT(NOTIF_WARNING, last_notif.severity);
+ EXPECT_EQ_STR(PCIE_ERRORS_PLUGIN, last_notif.plugin);
+ OK(NULL == last_notif.meta);
+ EXPECT_EQ_STR(TEST_DEVICE_STR, last_notif.plugin_instance);
+ EXPECT_EQ_STR(PCIE_ERROR, last_notif.type);
+ EXPECT_EQ_STR(PCIE_SEV_CE, last_notif.type_instance);
+ EXPECT_EQ_STR(BAD_TLP_SET_MSG, last_notif.message);
+
+ memset(&last_notif, 0, sizeof(last_notif));
+ dev.correctable_errors = PCI_ERR_COR_BAD_TLP;
+ pcie_dispatch_correctable_errors(&dev, PCI_ERR_COR_BAD_TLP,
+ ~(PCI_ERR_COR_BAD_TLP));
+ EXPECT_EQ_STR("", last_notif.plugin_instance);
+
+ pcie_config.persistent = 1;
+ pcie_dispatch_correctable_errors(&dev, PCI_ERR_COR_BAD_TLP,
+ ~(PCI_ERR_COR_BAD_TLP));
+ EXPECT_EQ_INT(NOTIF_WARNING, last_notif.severity);
+ EXPECT_EQ_STR(PCIE_ERRORS_PLUGIN, last_notif.plugin);
+ OK(NULL == last_notif.meta);
+ EXPECT_EQ_STR(TEST_DEVICE_STR, last_notif.plugin_instance);
+ EXPECT_EQ_STR(PCIE_ERROR, last_notif.type);
+ EXPECT_EQ_STR(PCIE_SEV_CE, last_notif.type_instance);
+ EXPECT_EQ_STR(BAD_TLP_SET_MSG, last_notif.message);
+
+ memset(&last_notif, 0, sizeof(last_notif));
+ pcie_dispatch_correctable_errors(&dev, PCI_ERR_COR_BAD_TLP,
+ PCI_ERR_COR_BAD_TLP);
+ EXPECT_EQ_STR("", last_notif.plugin_instance);
+
+ pcie_config.notif_masked = 1;
+ pcie_dispatch_correctable_errors(&dev, PCI_ERR_COR_BAD_TLP,
+ PCI_ERR_COR_BAD_TLP);
+ EXPECT_EQ_INT(NOTIF_WARNING, last_notif.severity);
+ EXPECT_EQ_STR(PCIE_ERRORS_PLUGIN, last_notif.plugin);
+ OK(NULL == last_notif.meta);
+ EXPECT_EQ_STR(TEST_DEVICE_STR, last_notif.plugin_instance);
+ EXPECT_EQ_STR(PCIE_ERROR, last_notif.type);
+ EXPECT_EQ_STR(PCIE_SEV_CE, last_notif.type_instance);
+ EXPECT_EQ_STR(BAD_TLP_SET_MSG, last_notif.message);
+
+ pcie_config.persistent = 0;
+ memset(&last_notif, 0, sizeof(last_notif));
+ pcie_dispatch_correctable_errors(&dev, PCI_ERR_COR_BAD_TLP,
+ PCI_ERR_COR_BAD_TLP);
+ EXPECT_EQ_STR("", last_notif.plugin_instance);
+
+ dev.correctable_errors = 0;
+ pcie_dispatch_correctable_errors(&dev, PCI_ERR_COR_BAD_TLP,
+ PCI_ERR_COR_BAD_TLP);
+ EXPECT_EQ_INT(NOTIF_WARNING, last_notif.severity);
+ EXPECT_EQ_STR(PCIE_ERRORS_PLUGIN, last_notif.plugin);
+ OK(NULL == last_notif.meta);
+ EXPECT_EQ_STR(TEST_DEVICE_STR, last_notif.plugin_instance);
+ EXPECT_EQ_STR(PCIE_ERROR, last_notif.type);
+ EXPECT_EQ_STR(PCIE_SEV_CE, last_notif.type_instance);
+ EXPECT_EQ_STR(BAD_TLP_SET_MSG, last_notif.message);
+
+ pcie_dispatch_correctable_errors(&dev, PCI_ERR_COR_BAD_TLP,
+ ~(PCI_ERR_COR_BAD_TLP));
+ EXPECT_EQ_INT(NOTIF_WARNING, last_notif.severity);
+ EXPECT_EQ_STR(PCIE_ERRORS_PLUGIN, last_notif.plugin);
+ OK(NULL == last_notif.meta);
+ EXPECT_EQ_STR(TEST_DEVICE_STR, last_notif.plugin_instance);
+ EXPECT_EQ_STR(PCIE_ERROR, last_notif.type);
+ EXPECT_EQ_STR(PCIE_SEV_CE, last_notif.type_instance);
+ EXPECT_EQ_STR(BAD_TLP_SET_MSG, last_notif.message);
+
+ pcie_config.notif_masked = 0;
+ dev.correctable_errors = PCI_ERR_COR_BAD_TLP;
+ pcie_dispatch_correctable_errors(&dev, 0, ~(PCI_ERR_COR_BAD_TLP));
+ EXPECT_EQ_INT(NOTIF_OKAY, last_notif.severity);
+ EXPECT_EQ_STR(PCIE_ERRORS_PLUGIN, last_notif.plugin);
+ OK(NULL == last_notif.meta);
+ EXPECT_EQ_STR(TEST_DEVICE_STR, last_notif.plugin_instance);
+ EXPECT_EQ_STR(PCIE_ERROR, last_notif.type);
+ EXPECT_EQ_STR(PCIE_SEV_CE, last_notif.type_instance);
+ EXPECT_EQ_STR(BAD_TLP_CLEAR_MSG, last_notif.message);
+
+ return 0;
+}
+
+#define FCP_NF_SET_MSG \
+ "Uncorrectable(non_fatal) Error set: Flow Control Protocol"
+#define FCP_F_SET_MSG "Uncorrectable(fatal) Error set: Flow Control Protocol"
+#define FCP_NF_CLEAR_MSG \
+ "Uncorrectable(non_fatal) Error cleared: Flow Control Protocol"
+#define FCP_F_CLEAR_MSG \
+ "Uncorrectable(fatal) Error cleared: Flow Control Protocol"
+
+DEF_TEST(dispatch_uncorrectable_errors) {
+ pcie_device_t dev = {0, TEST_DOMAIN, TEST_BUS, TEST_DEVICE, TEST_FUNCTION,
+ 0, 0, 0, 0, 0};
+ pcie_config.notif_masked = 0;
+ pcie_config.persistent = 0;
+
+ pcie_dispatch_uncorrectable_errors(&dev, PCI_ERR_UNC_FCP, ~(PCI_ERR_UNC_FCP),
+ ~(PCI_ERR_UNC_FCP));
+ EXPECT_EQ_INT(NOTIF_WARNING, last_notif.severity);
+ EXPECT_EQ_STR(PCIE_ERRORS_PLUGIN, last_notif.plugin);
+ OK(NULL == last_notif.meta);
+ EXPECT_EQ_STR(TEST_DEVICE_STR, last_notif.plugin_instance);
+ EXPECT_EQ_STR(PCIE_ERROR, last_notif.type);
+ EXPECT_EQ_STR(PCIE_SEV_NOFATAL, last_notif.type_instance);
+ EXPECT_EQ_STR(FCP_NF_SET_MSG, last_notif.message);
+
+ pcie_dispatch_uncorrectable_errors(&dev, PCI_ERR_UNC_FCP, ~(PCI_ERR_UNC_FCP),
+ PCI_ERR_UNC_FCP);
+ EXPECT_EQ_INT(NOTIF_FAILURE, last_notif.severity);
+ EXPECT_EQ_STR(PCIE_ERRORS_PLUGIN, last_notif.plugin);
+ OK(NULL == last_notif.meta);
+ EXPECT_EQ_STR(TEST_DEVICE_STR, last_notif.plugin_instance);
+ EXPECT_EQ_STR(PCIE_ERROR, last_notif.type);
+ EXPECT_EQ_STR(PCIE_SEV_FATAL, last_notif.type_instance);
+ EXPECT_EQ_STR(FCP_F_SET_MSG, last_notif.message);
+
+ memset(&last_notif, 0, sizeof(last_notif));
+ dev.uncorrectable_errors = PCI_ERR_UNC_FCP;
+ pcie_dispatch_uncorrectable_errors(&dev, PCI_ERR_UNC_FCP, ~(PCI_ERR_UNC_FCP),
+ PCI_ERR_UNC_FCP);
+ EXPECT_EQ_STR("", last_notif.plugin_instance);
+
+ pcie_config.persistent = 1;
+ pcie_dispatch_uncorrectable_errors(&dev, PCI_ERR_UNC_FCP, ~(PCI_ERR_UNC_FCP),
+ PCI_ERR_UNC_FCP);
+ EXPECT_EQ_INT(NOTIF_FAILURE, last_notif.severity);
+ EXPECT_EQ_STR(PCIE_ERRORS_PLUGIN, last_notif.plugin);
+ OK(NULL == last_notif.meta);
+ EXPECT_EQ_STR(TEST_DEVICE_STR, last_notif.plugin_instance);
+ EXPECT_EQ_STR(PCIE_ERROR, last_notif.type);
+ EXPECT_EQ_STR(PCIE_SEV_FATAL, last_notif.type_instance);
+ EXPECT_EQ_STR(FCP_F_SET_MSG, last_notif.message);
+
+ memset(&last_notif, 0, sizeof(last_notif));
+ pcie_dispatch_uncorrectable_errors(&dev, PCI_ERR_UNC_FCP, PCI_ERR_UNC_FCP,
+ PCI_ERR_UNC_FCP);
+ EXPECT_EQ_STR("", last_notif.plugin_instance);
+
+ pcie_config.notif_masked = 1;
+ pcie_dispatch_uncorrectable_errors(&dev, PCI_ERR_UNC_FCP, PCI_ERR_UNC_FCP,
+ PCI_ERR_UNC_FCP);
+ EXPECT_EQ_INT(NOTIF_FAILURE, last_notif.severity);
+ EXPECT_EQ_STR(PCIE_ERRORS_PLUGIN, last_notif.plugin);
+ OK(NULL == last_notif.meta);
+ EXPECT_EQ_STR(TEST_DEVICE_STR, last_notif.plugin_instance);
+ EXPECT_EQ_STR(PCIE_ERROR, last_notif.type);
+ EXPECT_EQ_STR(PCIE_SEV_FATAL, last_notif.type_instance);
+ EXPECT_EQ_STR(FCP_F_SET_MSG, last_notif.message);
+
+ pcie_config.persistent = 0;
+ dev.uncorrectable_errors = 0;
+ memset(&last_notif, 0, sizeof(last_notif));
+ pcie_dispatch_uncorrectable_errors(&dev, PCI_ERR_UNC_FCP, ~(PCI_ERR_UNC_FCP),
+ PCI_ERR_UNC_FCP);
+ EXPECT_EQ_INT(NOTIF_FAILURE, last_notif.severity);
+ EXPECT_EQ_STR(PCIE_ERRORS_PLUGIN, last_notif.plugin);
+ OK(NULL == last_notif.meta);
+ EXPECT_EQ_STR(TEST_DEVICE_STR, last_notif.plugin_instance);
+ EXPECT_EQ_STR(PCIE_ERROR, last_notif.type);
+ EXPECT_EQ_STR(PCIE_SEV_FATAL, last_notif.type_instance);
+ EXPECT_EQ_STR(FCP_F_SET_MSG, last_notif.message);
+
+ pcie_config.notif_masked = 0;
+ dev.uncorrectable_errors = PCI_ERR_UNC_FCP;
+ pcie_dispatch_uncorrectable_errors(&dev, 0, ~(PCI_ERR_UNC_FCP),
+ ~(PCI_ERR_UNC_FCP));
+ EXPECT_EQ_INT(NOTIF_OKAY, last_notif.severity);
+ EXPECT_EQ_STR(PCIE_ERRORS_PLUGIN, last_notif.plugin);
+ OK(NULL == last_notif.meta);
+ EXPECT_EQ_STR(TEST_DEVICE_STR, last_notif.plugin_instance);
+ EXPECT_EQ_STR(PCIE_ERROR, last_notif.type);
+ EXPECT_EQ_STR(PCIE_SEV_NOFATAL, last_notif.type_instance);
+ EXPECT_EQ_STR(FCP_NF_CLEAR_MSG, last_notif.message);
+
+ memset(&last_notif, 0, sizeof(last_notif));
+ pcie_dispatch_uncorrectable_errors(&dev, 0, ~(PCI_ERR_UNC_FCP),
+ PCI_ERR_UNC_FCP);
+ EXPECT_EQ_INT(NOTIF_OKAY, last_notif.severity);
+ EXPECT_EQ_STR(PCIE_ERRORS_PLUGIN, last_notif.plugin);
+ OK(NULL == last_notif.meta);
+ EXPECT_EQ_STR(TEST_DEVICE_STR, last_notif.plugin_instance);
+ EXPECT_EQ_STR(PCIE_ERROR, last_notif.type);
+ EXPECT_EQ_STR(PCIE_SEV_FATAL, last_notif.type_instance);
+ EXPECT_EQ_STR(FCP_F_CLEAR_MSG, last_notif.message);
+
+ return 0;
+}
+
+#define UR_SET_MSG "Device Status Error set: Unsupported Request"
+#define UR_CLEAR_MSG "Device Status Error cleared: Unsupported Request"
+#define FE_SET_MSG "Device Status Error set: Fatal Error"
+#define FE_CLEAR_MSG "Device Status Error cleared: Fatal Error"
+
+DEF_TEST(device_status_errors) {
+ pcie_device_t dev = {0, TEST_DOMAIN, TEST_BUS, TEST_DEVICE, TEST_FUNCTION,
+ 0, 0, 0, 0, 0};
+ pcie_config.persistent = 0;
+ g_buff[0] = (PCI_EXP_DEVSTA_URD & 0xff);
+
+ memset(&last_notif, 0, sizeof(last_notif));
+ pcie_check_dev_status(&dev, 0);
+ EXPECT_EQ_INT(NOTIF_WARNING, last_notif.severity);
+ EXPECT_EQ_STR(PCIE_ERRORS_PLUGIN, last_notif.plugin);
+ OK(NULL == last_notif.meta);
+ EXPECT_EQ_STR(TEST_DEVICE_STR, last_notif.plugin_instance);
+ EXPECT_EQ_STR(PCIE_ERROR, last_notif.type);
+ EXPECT_EQ_STR(PCIE_SEV_NOFATAL, last_notif.type_instance);
+ EXPECT_EQ_STR(UR_SET_MSG, last_notif.message);
+
+ memset(&last_notif, 0, sizeof(last_notif));
+ pcie_check_dev_status(&dev, 0);
+ EXPECT_EQ_STR("", last_notif.plugin_instance);
+
+ pcie_config.persistent = 1;
+ pcie_check_dev_status(&dev, 0);
+ EXPECT_EQ_INT(NOTIF_WARNING, last_notif.severity);
+ EXPECT_EQ_STR(PCIE_ERRORS_PLUGIN, last_notif.plugin);
+ OK(NULL == last_notif.meta);
+ EXPECT_EQ_STR(TEST_DEVICE_STR, last_notif.plugin_instance);
+ EXPECT_EQ_STR(PCIE_ERROR, last_notif.type);
+ EXPECT_EQ_STR(PCIE_SEV_NOFATAL, last_notif.type_instance);
+ EXPECT_EQ_STR(UR_SET_MSG, last_notif.message);
+
+ g_buff[0] = 0;
+ pcie_check_dev_status(&dev, 0);
+ EXPECT_EQ_INT(NOTIF_OKAY, last_notif.severity);
+ EXPECT_EQ_STR(PCIE_ERRORS_PLUGIN, last_notif.plugin);
+ OK(NULL == last_notif.meta);
+ EXPECT_EQ_STR(TEST_DEVICE_STR, last_notif.plugin_instance);
+ EXPECT_EQ_STR(PCIE_ERROR, last_notif.type);
+ EXPECT_EQ_STR(PCIE_SEV_NOFATAL, last_notif.type_instance);
+ EXPECT_EQ_STR(UR_CLEAR_MSG, last_notif.message);
+
+ pcie_config.persistent = 0;
+ dev.device_status = PCI_EXP_DEVSTA_URD;
+ pcie_check_dev_status(&dev, 0);
+ EXPECT_EQ_INT(NOTIF_OKAY, last_notif.severity);
+ EXPECT_EQ_STR(PCIE_ERRORS_PLUGIN, last_notif.plugin);
+ OK(NULL == last_notif.meta);
+ EXPECT_EQ_STR(TEST_DEVICE_STR, last_notif.plugin_instance);
+ EXPECT_EQ_STR(PCIE_ERROR, last_notif.type);
+ EXPECT_EQ_STR(PCIE_SEV_NOFATAL, last_notif.type_instance);
+ EXPECT_EQ_STR(UR_CLEAR_MSG, last_notif.message);
+
+ memset(&last_notif, 0, sizeof(last_notif));
+ pcie_check_dev_status(&dev, 0);
+ EXPECT_EQ_STR("", last_notif.plugin_instance);
+
+ g_buff[0] = (PCI_EXP_DEVSTA_FED & 0xff);
+ pcie_check_dev_status(&dev, 0);
+ EXPECT_EQ_INT(NOTIF_FAILURE, last_notif.severity);
+ EXPECT_EQ_STR(PCIE_ERRORS_PLUGIN, last_notif.plugin);
+ OK(NULL == last_notif.meta);
+ EXPECT_EQ_STR(TEST_DEVICE_STR, last_notif.plugin_instance);
+ EXPECT_EQ_STR(PCIE_ERROR, last_notif.type);
+ EXPECT_EQ_STR(PCIE_SEV_FATAL, last_notif.type_instance);
+ EXPECT_EQ_STR(FE_SET_MSG, last_notif.message);
+
+ g_buff[0] = 0;
+ pcie_check_dev_status(&dev, 0);
+ EXPECT_EQ_INT(NOTIF_OKAY, last_notif.severity);
+ EXPECT_EQ_STR(PCIE_ERRORS_PLUGIN, last_notif.plugin);
+ OK(NULL == last_notif.meta);
+ EXPECT_EQ_STR(TEST_DEVICE_STR, last_notif.plugin_instance);
+ EXPECT_EQ_STR(PCIE_ERROR, last_notif.type);
+ EXPECT_EQ_STR(PCIE_SEV_FATAL, last_notif.type_instance);
+ EXPECT_EQ_STR(FE_CLEAR_MSG, last_notif.message);
+
+ return 0;
+}
+
+int main(void) {
+ RUN_TEST(clear_dev_list);
+ RUN_TEST(add_to_list);
+ RUN_TEST(pcie_read);
+ RUN_TEST(dispatch_notification);
+
+ RUN_TEST(access_config);
+ RUN_TEST(plugin_config_fail);
+ RUN_TEST(plugin_config);
+
+ RUN_TEST(dispatch_correctable_errors);
+ RUN_TEST(dispatch_uncorrectable_errors);
+ RUN_TEST(device_status_errors);
+
+ END_TEST;
+}
operations value:DERIVE:0:U
operations_per_second value:GAUGE:0:U
packets value:DERIVE:0:U
+pcie_error value:GAUGE:U:U
pending_operations value:GAUGE:0:U
percent value:GAUGE:0:100.1
percent_bytes value:GAUGE:0:100.1