diff mbox

[14/14] intel_l3_parity: Support a daemonic mode

Message ID 1379477575-2164-14-git-send-email-benjamin.widawsky@intel.com (mailing list archive)
State New, archived
Headers show

Commit Message

Ben Widawsky Sept. 18, 2013, 4:12 a.m. UTC
v2: Add a comment explaining the dangers of directly accessing the DFT
register (Daniel)

Signed-off-by: Ben Widawsky <ben@bwidawsk.net>
---
 tools/Makefile.am              |   6 ++-
 tools/intel_l3_parity.c        |  46 ++++++++++++++++--
 tools/intel_l3_parity.h        |  31 ++++++++++++
 tools/intel_l3_udev_listener.c | 108 +++++++++++++++++++++++++++++++++++++++++
 4 files changed, 186 insertions(+), 5 deletions(-)
 create mode 100644 tools/intel_l3_parity.h
 create mode 100644 tools/intel_l3_udev_listener.c
diff mbox

Patch

diff --git a/tools/Makefile.am b/tools/Makefile.am
index 47bd5b3..19810cf 100644
--- a/tools/Makefile.am
+++ b/tools/Makefile.am
@@ -39,7 +39,7 @@  dist_bin_SCRIPTS = intel_gpu_abrt
 
 AM_CPPFLAGS = -I$(top_srcdir) -I$(top_srcdir)/lib
 AM_CFLAGS = $(DRM_CFLAGS) $(PCIACCESS_CFLAGS) $(CWARNFLAGS) $(CAIRO_CFLAGS)
-LDADD = $(top_builddir)/lib/libintel_tools.la $(DRM_LIBS) $(PCIACCESS_LIBS) $(CAIRO_LIBS)
+LDADD = $(top_builddir)/lib/libintel_tools.la $(DRM_LIBS) $(PCIACCESS_LIBS) $(CAIRO_LIBS) $(LIBUDEV_LIBS)
 
 intel_dump_decode_SOURCES = 	\
 	intel_dump_decode.c
@@ -50,3 +50,7 @@  intel_error_decode_SOURCES =	\
 intel_bios_reader_SOURCES =	\
 	intel_bios_reader.c	\
 	intel_bios.h
+
+intel_l3_parity_SOURCES =	\
+	intel_l3_parity.c	\
+	intel_l3_udev_listener.c
diff --git a/tools/intel_l3_parity.c b/tools/intel_l3_parity.c
index d2ad3c9..ead8fb5 100644
--- a/tools/intel_l3_parity.c
+++ b/tools/intel_l3_parity.c
@@ -37,6 +37,14 @@ 
 #include "intel_chipset.h"
 #include "intel_gpu_tools.h"
 #include "drmtest.h"
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+#if HAVE_UDEV
+#include <libudev.h>
+#include <syslog.h>
+#endif
+#include "intel_l3_parity.h"
 
 static unsigned int devid;
 /* L3 size is always a function of banks. The number of banks cannot be
@@ -157,7 +165,8 @@  static void usage(const char *name)
 		"  -r, --row=[row]			The row to act upon (default 0)\n"
 		"  -b, --bank=[bank]			The bank to act upon (default 0)\n"
 		"  -s, --subbank=[subbank]		The subbank to act upon (default 0)\n"
-		"  -w, --slice=[slice]			Which slice to act on (default: -1 [all])"
+		"  -w, --slice=[slice]			Which slice to act on (default: -1 [all])\n"
+		"    , --daemon				Run the listener (-L) as a daemon\n"
 		" ACTIONS (only 1 may be specified at a time):\n"
 		"  -h, --help				Display this help\n"
 		"  -H, --hw-info				Display the current L3 properties\n"
@@ -166,7 +175,8 @@  static void usage(const char *name)
 		"  -e, --enable				Enable row, bank, subbank (undo -d)\n"
 		"  -d, --disable=<row,bank,subbank>	Disable row, bank, subbank (inline arguments are deprecated. Please use -r, -b, -s instead\n"
 		"  -i, --inject				[HSW only] Cause hardware to inject a row errors\n"
-		"  -u, --uninject			[HSW only] Turn off hardware error injectection (undo -i)\n",
+		"  -u, --uninject			[HSW only] Turn off hardware error injectection (undo -i)\n"
+		"  -L, --listen				Listen for uevent errors\n",
 		name);
 }
 
@@ -179,6 +189,7 @@  int main(int argc, char *argv[])
 	int fd[REAL_MAX_SLICES] = {0}, ret, i;
 	int action = '0';
 	int drm_fd = drm_open_any();
+	int daemonize = 0;
 	devid = intel_get_drm_devid(drm_fd);
 
 	if (intel_gen(devid) < 7 || IS_VALLEYVIEW(devid))
@@ -202,11 +213,18 @@  int main(int argc, char *argv[])
 		assert(lseek(fd[i], 0, SEEK_SET) == 0);
 	}
 
+	/* NB: It is potentially unsafe to read this register if the kernel is
+	 * actively using this register range, or we're running multiple
+	 * instances of this tool. Since neither of those cases should occur
+	 * (and the tool should be root only) we can safely ignore this for
+	 * now. Just be aware of this if for some reason a hang is reported
+	 * when using this tool.
+	 */
 	dft = intel_register_read(0xb038);
 
 	while (1) {
 		int c, option_index = 0;
-		static struct option long_options[] = {
+		struct option long_options[] = {
 			{ "help", no_argument, 0, 'h' },
 			{ "list", no_argument, 0, 'l' },
 			{ "clear-all", no_argument, 0, 'a' },
@@ -215,18 +233,23 @@  int main(int argc, char *argv[])
 			{ "inject", no_argument, 0, 'i' },
 			{ "uninject", no_argument, 0, 'u' },
 			{ "hw-info", no_argument, 0, 'H' },
+			{ "listen", no_argument, 0, 'L' },
 			{ "row", required_argument, 0, 'r' },
 			{ "bank", required_argument, 0, 'b' },
 			{ "subbank", required_argument, 0, 's' },
 			{ "slice", required_argument, 0, 'w' },
+			{ "daemon", no_argument, &daemonize, 1 },
 			{0, 0, 0, 0}
 		};
 
-		c = getopt_long(argc, argv, "hHr:b:s:w:aled::iu", long_options,
+		c = getopt_long(argc, argv, "hHr:b:s:w:aled::iuL", long_options,
 				&option_index);
 		if (c == -1)
 			break;
 
+		if (c == 0)
+			continue;
+
 		switch (c) {
 			case '?':
 			case 'h':
@@ -274,6 +297,7 @@  int main(int argc, char *argv[])
 			case 'a':
 			case 'l':
 			case 'e':
+			case 'L':
 				if (action != '0') {
 					fprintf(stderr, "Only one action may be specified\n");
 					exit(EXIT_FAILURE);
@@ -299,6 +323,20 @@  int main(int argc, char *argv[])
 			printf("warning: overwriting existing injections. This is very dangerous.\n");
 	}
 
+	/* Daemon doesn't work like the other commands */
+	if (action == 'L') {
+		struct l3_parity par;
+		struct l3_location loc;
+		if (daemonize) {
+			assert(daemon(0, 0) == 0);
+			openlog(argv[0], LOG_CONS | LOG_PID, LOG_USER);
+		}
+		memset(&par, 0, sizeof(par));
+		assert(l3_uevent_setup(&par) == 0);
+		assert(l3_listen(&par, daemonize == 1, &loc) == 0);
+		exit(EXIT_SUCCESS);
+	}
+
 	if (action == 'l')
 		decode_dft(dft);
 
diff --git a/tools/intel_l3_parity.h b/tools/intel_l3_parity.h
new file mode 100644
index 0000000..65697c4
--- /dev/null
+++ b/tools/intel_l3_parity.h
@@ -0,0 +1,31 @@ 
+#ifndef INTEL_L3_PARITY_H_
+#define INTEL_L3_PARITY_H_
+
+#include <stdint.h>
+#include <stdbool.h>
+
+struct l3_parity {
+	struct udev *udev;
+	struct udev_monitor *uevent_monitor;
+	int fd;
+	fd_set fdset;
+};
+
+struct l3_location {
+	uint8_t slice;
+	uint16_t row;
+	uint8_t bank;
+	uint8_t subbank;
+};
+
+#if HAVE_UDEV
+int l3_uevent_setup(struct l3_parity *par);
+/* Listens (blocks) for an l3 parity event. Returns the location of the error. */
+int l3_listen(struct l3_parity *par, bool daemon, struct l3_location *loc);
+#define l3_uevent_teardown(par) {}
+#else
+#define l3_uevent_setup(par, daemon, loc) -1
+#define l3_listen(par) -1
+#endif
+
+#endif
diff --git a/tools/intel_l3_udev_listener.c b/tools/intel_l3_udev_listener.c
new file mode 100644
index 0000000..c50820c
--- /dev/null
+++ b/tools/intel_l3_udev_listener.c
@@ -0,0 +1,108 @@ 
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#if HAVE_UDEV
+#include <libudev.h>
+#ifndef _GNU_SOURCE
+#define _GNU_SOURCE
+#endif
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+#include <assert.h>
+#include <syslog.h>
+#include "i915_drm.h"
+#include "intel_l3_parity.h"
+
+#ifndef I915_L3_PARITY_UEVENT
+#define I915_L3_PARITY_UEVENT "L3_PARITY_ERROR"
+#endif
+
+int l3_uevent_setup(struct l3_parity *par)
+{
+	struct udev *udev;
+	struct udev_monitor *uevent_monitor;
+	fd_set fdset;
+	int fd, ret = -1;
+
+	udev = udev_new();
+	if (!udev) {
+		return -1;
+	}
+
+	uevent_monitor = udev_monitor_new_from_netlink(udev, "udev");
+	if (!uevent_monitor)
+		goto err_out;
+
+	ret = udev_monitor_filter_add_match_subsystem_devtype(uevent_monitor, "drm", "drm_minor");
+	if (ret < 0)
+		goto err_out;
+
+	ret = udev_monitor_enable_receiving(uevent_monitor);
+	if (ret < 0)
+		goto err_out;
+
+	fd = udev_monitor_get_fd(uevent_monitor);
+	FD_ZERO(&fdset);
+	FD_SET(fd, &fdset);
+
+	par->udev = udev;
+	par->fd = fd;
+	par->fdset = fdset;
+	par->uevent_monitor = uevent_monitor;
+	return 0;
+
+err_out:
+	udev_unref(udev);
+	return ret;
+}
+
+int l3_listen(struct l3_parity *par, bool daemon, struct l3_location *loc)
+{
+	struct udev_device *udev_dev;
+	const char *parity_status;
+	char *err_msg;
+	int ret;
+
+again:
+	ret = select(par->fd + 1, &par->fdset, NULL, NULL, NULL);
+	/* Number of bits set is returned, must be >= 1 */
+	if (ret <= 0) {
+		return ret;
+	}
+
+	assert(FD_ISSET(par->fd, &par->fdset));
+
+	udev_dev = udev_monitor_receive_device(par->uevent_monitor);
+	if (!udev_dev)
+		return -1;
+
+	parity_status = udev_device_get_property_value(udev_dev, I915_L3_PARITY_UEVENT);
+	if (strncmp(parity_status, "1", 1))
+		goto again;
+
+	loc->slice = atoi(udev_device_get_property_value(udev_dev, "SLICE"));
+	loc->row = atoi(udev_device_get_property_value(udev_dev, "ROW"));
+	loc->bank = atoi(udev_device_get_property_value(udev_dev, "BANK"));
+	loc->subbank = atoi(udev_device_get_property_value(udev_dev, "SUBBANK"));
+
+	udev_device_unref(udev_dev);
+
+	asprintf(&err_msg, "Parity error detected on: %d,%d,%d,%d. "
+			"Try to run intel_l3_parity -r %d -b %d -s %d -w %d -d",
+			loc->slice, loc->row, loc->bank, loc->subbank,
+			loc->row, loc->bank, loc->subbank, loc->slice);
+	if (daemon) {
+		syslog(LOG_INFO, "%s\n", err_msg);
+		goto again;
+	}
+
+	fprintf(stderr, "%s\n", err_msg);
+
+	free(err_msg);
+
+	return 0;
+}
+#endif