@@ -122,7 +122,9 @@ static int userfaultfd_wake_function(wait_queue_entry_t *wq, unsigned mode,
if (len && (start > uwq->msg.arg.pagefault.address ||
start + len <= uwq->msg.arg.pagefault.address))
goto out;
- WRITE_ONCE(uwq->waken, true);
+
+ smp_store_mb(uwq->waken, true);
+
/*
* The Program-Order guarantees provided by the scheduler
* ensure uwq->waken is visible before the task is woken.
@@ -377,6 +379,7 @@ vm_fault_t handle_userfault(struct vm_fault *vmf, unsigned long reason)
vm_fault_t ret = VM_FAULT_SIGBUS;
bool must_wait;
long blocking_state;
+ bool poll;
/*
* We don't do userfault handling for the final child pid update.
@@ -410,6 +413,8 @@ vm_fault_t handle_userfault(struct vm_fault *vmf, unsigned long reason)
if (ctx->features & UFFD_FEATURE_SIGBUS)
goto out;
+ poll = ctx->features & UFFD_FEATURE_POLL;
+
/*
* If it's already released don't get it. This avoids to loop
* in __get_user_pages if userfaultfd_release waits on the
@@ -495,7 +500,10 @@ vm_fault_t handle_userfault(struct vm_fault *vmf, unsigned long reason)
* following the spin_unlock to happen before the list_add in
* __add_wait_queue.
*/
- set_current_state(blocking_state);
+
+ if (!poll)
+ set_current_state(blocking_state);
+
spin_unlock_irq(&ctx->fault_pending_wqh.lock);
if (!is_vm_hugetlb_page(vmf->vma))
@@ -509,10 +517,18 @@ vm_fault_t handle_userfault(struct vm_fault *vmf, unsigned long reason)
if (likely(must_wait && !READ_ONCE(ctx->released))) {
wake_up_poll(&ctx->fd_wqh, EPOLLIN);
- schedule();
+ if (poll) {
+ while (!READ_ONCE(uwq.waken) && !READ_ONCE(ctx->released) &&
+ !signal_pending(current)) {
+ cpu_relax();
+ cond_resched();
+ }
+ } else
+ schedule();
}
- __set_current_state(TASK_RUNNING);
+ if (!poll)
+ __set_current_state(TASK_RUNNING);
/*
* Here we race with the list_del; list_add in
@@ -27,7 +27,9 @@
UFFD_FEATURE_MISSING_HUGETLBFS | \
UFFD_FEATURE_MISSING_SHMEM | \
UFFD_FEATURE_SIGBUS | \
- UFFD_FEATURE_THREAD_ID)
+ UFFD_FEATURE_THREAD_ID | \
+ UFFD_FEATURE_POLL)
+
#define UFFD_API_IOCTLS \
((__u64)1 << _UFFDIO_REGISTER | \
(__u64)1 << _UFFDIO_UNREGISTER | \
@@ -171,6 +173,10 @@ struct uffdio_api {
*
* UFFD_FEATURE_THREAD_ID pid of the page faulted task_struct will
* be returned, if feature is not requested 0 will be returned.
+ *
+ * UFFD_FEATURE_POLL polls upon page-fault if the feature is requested
+ * instead of descheduling. This feature should only be enabled for
+ * low-latency handlers and when CPUs are not overcomitted.
*/
#define UFFD_FEATURE_PAGEFAULT_FLAG_WP (1<<0)
#define UFFD_FEATURE_EVENT_FORK (1<<1)
@@ -181,6 +187,7 @@ struct uffdio_api {
#define UFFD_FEATURE_EVENT_UNMAP (1<<6)
#define UFFD_FEATURE_SIGBUS (1<<7)
#define UFFD_FEATURE_THREAD_ID (1<<8)
+#define UFFD_FEATURE_POLL (1<<9)
__u64 features;
__u64 ioctls;