@@ -242,3 +242,67 @@ SYSCALL_DEFINE1(eventfd, unsigned int, count)
return sys_eventfd2(count, 0);
}
+static int eventfd_notifier_wakeup(wait_queue_t *wait, unsigned mode,
+ int sync, void *key)
+{
+ struct eventfd_notifier *en;
+ unsigned long flags = (unsigned long)key;
+
+ en = container_of(wait, struct eventfd_notifier, wait);
+
+ if (flags & POLLIN)
+ /*
+ * The POLLIN wake_up is called with interrupts disabled.
+ */
+ en->ops->signal(en);
+
+ if (flags & POLLHUP) {
+ /*
+ * The POLLHUP is called unlocked, so it theoretically should
+ * be safe to remove ourselves from the wqh using the locked
+ * variant of remove_wait_queue()
+ */
+ remove_wait_queue(en->wqh, &en->wait);
+ en->ops->release(en);
+ }
+
+ return 0;
+}
+
+static void eventfd_notifier_ptable_enqueue(struct file *file,
+ wait_queue_head_t *wqh,
+ poll_table *pt)
+{
+ struct eventfd_notifier *en;
+
+ en = container_of(pt, struct eventfd_notifier, pt);
+
+ en->wqh = wqh;
+ add_wait_queue(wqh, &en->wait);
+}
+
+int eventfd_notifier_register(struct file *file, struct eventfd_notifier *en)
+{
+ unsigned int events;
+
+ if (file->f_op != &eventfd_fops)
+ return -EINVAL;
+
+ /*
+ * Install our own custom wake-up handling so we are notified via
+ * a callback whenever someone signals the underlying eventfd
+ */
+ init_waitqueue_func_entry(&en->wait, eventfd_notifier_wakeup);
+ init_poll_funcptr(&en->pt, eventfd_notifier_ptable_enqueue);
+
+ events = file->f_op->poll(file, &en->pt);
+
+ return (events & POLLIN) ? 1 : 0;
+}
+EXPORT_SYMBOL_GPL(eventfd_notifier_register);
+
+void eventfd_notifier_unregister(struct eventfd_notifier *en)
+{
+ remove_wait_queue(en->wqh, &en->wait);
+}
+EXPORT_SYMBOL_GPL(eventfd_notifier_unregister);
@@ -8,6 +8,32 @@
#ifndef _LINUX_EVENTFD_H
#define _LINUX_EVENTFD_H
+#include <linux/wait.h>
+#include <linux/poll.h>
+#include <linux/file.h>
+#include <linux/list.h>
+
+struct eventfd_notifier;
+
+struct eventfd_notifier_ops {
+ void (*signal)(struct eventfd_notifier *en);
+ void (*release)(struct eventfd_notifier *en);
+};
+
+struct eventfd_notifier {
+ poll_table pt;
+ wait_queue_head_t *wqh;
+ wait_queue_t wait;
+ const struct eventfd_notifier_ops *ops;
+};
+
+static inline void eventfd_notifier_init(struct eventfd_notifier *en,
+ const struct eventfd_notifier_ops *ops)
+{
+ memset(en, 0, sizeof(*en));
+ en->ops = ops;
+}
+
#ifdef CONFIG_EVENTFD
/* For O_CLOEXEC and O_NONBLOCK */
@@ -29,12 +55,19 @@
struct file *eventfd_fget(int fd);
int eventfd_signal(struct file *file, int n);
+int eventfd_notifier_register(struct file *file, struct eventfd_notifier *en);
+void eventfd_notifier_unregister(struct eventfd_notifier *en);
#else /* CONFIG_EVENTFD */
#define eventfd_fget(fd) ERR_PTR(-ENOSYS)
static inline int eventfd_signal(struct file *file, int n)
{ return 0; }
+static inline int eventfd_notifier_register(struct file *file,
+ struct eventfd_notifier *en)
+{ return -ENOSYS; }
+static inline int eventfd_notifier_unregister(struct eventfd_notifier *en)
+{ return -ENOSYS; }
#endif /* CONFIG_EVENTFD */
Users that want to register for signal notifications with eventfd have several choices today: They can do a standard sleep+wakeup against a ->read(), or they can provide their own wakeup handling using the wait-queue callback mechanism coupled with the the eventfd->poll() interface. In fact, Davide recently published a patch that allows eventfd to transmit a "release" event when the underlying eventfd is closed via a POLLHUP wakeup. This type of event is extremely useful for in-kernel notification clients. However the wait-queue based notification interface alone is not sufficient to use this new information race-free since it requires operating lockless and referenceless. We need to track some additional data that is independent of the file* pointer, since we need f_ops->release() to still function. Therefore, this patch lays the groundwork to try and fix these issues. It accomplishes this by abstracting eventfd's wait-queue based notification interface behind eventfd specific register()/unregister() verbs. It also provides an eventfd specific object (eventfd_notifier) that is intended to be embedded in the client, but used by eventfd to track proper state. We will use this interface later in the series to fix the current races. Signed-off-by: Gregory Haskins <ghaskins@novell.com> CC: Davide Libenzi <davidel@xmailserver.org> CC: Michael S. Tsirkin <mst@redhat.com> --- fs/eventfd.c | 64 +++++++++++++++++++++++++++++++++++++++++++++++ include/linux/eventfd.h | 33 ++++++++++++++++++++++++ 2 files changed, 97 insertions(+), 0 deletions(-) -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html