@@ -1140,6 +1140,34 @@ static __poll_t userfaultfd_poll(struct file *file, poll_table *wait)
static const struct file_operations userfaultfd_fops;
+/* Open-coded version of anon_inode_getfd() to setup FMODE_PWRITE */
+static int userfaultfd_getfd(const char *name, const struct file_operations *fops,
+ void *priv, int flags)
+{
+ int error, fd;
+ struct file *file;
+
+ error = get_unused_fd_flags(flags);
+ if (error < 0)
+ return error;
+ fd = error;
+
+ file = anon_inode_getfile(name, fops, priv, flags);
+
+ if (IS_ERR(file)) {
+ error = PTR_ERR(file);
+ goto err_put_unused_fd;
+ }
+ file->f_mode |= FMODE_PWRITE;
+ fd_install(fd, file);
+
+ return fd;
+
+err_put_unused_fd:
+ put_unused_fd(fd);
+ return error;
+}
+
static int resolve_userfault_fork(struct userfaultfd_ctx *ctx,
struct userfaultfd_ctx *new,
struct uffd_msg *msg)
@@ -1161,7 +1189,7 @@ static int resolve_userfault_fork(struct userfaultfd_ctx *ctx,
task_unlock(current);
}
- fd = anon_inode_getfd("[userfaultfd]", &userfaultfd_fops, new,
+ fd = userfaultfd_getfd("[userfaultfd]", &userfaultfd_fops, new,
O_RDWR | (new->flags & UFFD_SHARED_FCNTL_FLAGS));
if (files != NULL) {
@@ -1496,6 +1524,69 @@ static __always_inline int validate_range(struct mm_struct *mm,
return 0;
}
+ssize_t userfaultfd_write_iter(struct kiocb *iocb, struct iov_iter *from)
+{
+ struct file *file = iocb->ki_filp;
+ struct userfaultfd_wake_range range;
+ struct userfaultfd_ctx *ctx = file->private_data;
+ size_t len = iov_iter_count(from);
+ __u64 dst = iocb->ki_pos & PAGE_MASK;
+ unsigned long mode = iocb->ki_pos & ~PAGE_MASK;
+ bool zeropage;
+ __s64 ret;
+
+ BUG_ON(len == 0);
+
+ zeropage = mode & UFFDIO_WRITE_MODE_ZEROPAGE;
+
+ ret = -EINVAL;
+ if (mode & ~(UFFDIO_WRITE_MODE_DONTWAKE | UFFDIO_WRITE_MODE_WP |
+ UFFDIO_WRITE_MODE_ZEROPAGE))
+ goto out;
+
+ mode = mode & (UFFDIO_WRITE_MODE_DONTWAKE | UFFDIO_WRITE_MODE_WP);
+
+ /*
+ * Keep compatibility with zeropage ioctl, which does not allow
+ * write-protect and dontwake.
+ */
+ if (zeropage &&
+ (mode & (UFFDIO_WRITE_MODE_DONTWAKE | UFFDIO_WRITE_MODE_WP)) ==
+ (UFFDIO_WRITE_MODE_DONTWAKE | UFFDIO_WRITE_MODE_WP))
+ goto out;
+
+ ret = -EAGAIN;
+ if (READ_ONCE(ctx->mmap_changing))
+ goto out;
+
+ ret = validate_range(ctx->mm, &dst, len);
+ if (ret)
+ goto out;
+
+ if (mmget_not_zero(ctx->mm)) {
+ if (zeropage)
+ ret = mfill_zeropage(ctx->mm, dst, from,
+ &ctx->mmap_changing);
+ else
+ ret = mcopy_atomic(ctx->mm, dst, from,
+ &ctx->mmap_changing, mode);
+ mmput(ctx->mm);
+ } else {
+ return -ESRCH;
+ }
+ if (ret < 0)
+ goto out;
+
+ /* len == 0 would wake all */
+ range.len = ret;
+ if (!(mode & UFFDIO_COPY_MODE_DONTWAKE)) {
+ range.start = dst;
+ wake_userfault(ctx, &range);
+ }
+out:
+ return ret;
+}
+
static inline bool vma_can_userfault(struct vm_area_struct *vma,
unsigned long vm_flags)
{
@@ -2197,6 +2288,7 @@ static const struct file_operations userfaultfd_fops = {
.release = userfaultfd_release,
.poll = userfaultfd_poll,
.read_iter = userfaultfd_read_iter,
+ .write_iter = userfaultfd_write_iter,
.unlocked_ioctl = userfaultfd_ioctl,
.compat_ioctl = compat_ptr_ioctl,
.llseek = noop_llseek,
@@ -2248,7 +2340,7 @@ SYSCALL_DEFINE1(userfaultfd, int, flags)
ctx->files = get_files_struct(current);
- fd = anon_inode_getfd("[userfaultfd]", &userfaultfd_fops, ctx,
+ fd = userfaultfd_getfd("[userfaultfd]", &userfaultfd_fops, ctx,
O_RDWR | (flags & UFFD_SHARED_FCNTL_FLAGS));
if (fd < 0) {
mmdrop(ctx->mm);
@@ -28,7 +28,8 @@
UFFD_FEATURE_MISSING_SHMEM | \
UFFD_FEATURE_SIGBUS | \
UFFD_FEATURE_THREAD_ID | \
- UFFD_FEATURE_POLL)
+ UFFD_FEATURE_POLL | \
+ UFFD_FEATURE_WRITE)
#define UFFD_API_IOCTLS \
((__u64)1 << _UFFDIO_REGISTER | \
@@ -177,6 +178,9 @@ struct uffdio_api {
* UFFD_FEATURE_POLL polls upon page-fault if the feature is requested
* instead of descheduling. This feature should only be enabled for
* low-latency handlers and when CPUs are not overcomitted.
+ *
+ * UFFD_FEATURE_WRITE allows to use the write interface for copy and
+ * zeroing of pages in addition to the ioctl interface.
*/
#define UFFD_FEATURE_PAGEFAULT_FLAG_WP (1<<0)
#define UFFD_FEATURE_EVENT_FORK (1<<1)
@@ -188,6 +192,7 @@ struct uffdio_api {
#define UFFD_FEATURE_SIGBUS (1<<7)
#define UFFD_FEATURE_THREAD_ID (1<<8)
#define UFFD_FEATURE_POLL (1<<9)
+#define UFFD_FEATURE_WRITE (1<<10)
__u64 features;
__u64 ioctls;
@@ -264,4 +269,11 @@ struct uffdio_writeprotect {
__u64 mode;
};
+/*
+ * Write modes to be use with UFFDIO_SET_WRITE_MODE ioctl.
+ */
+#define UFFDIO_WRITE_MODE_DONTWAKE UFFDIO_COPY_MODE_DONTWAKE
+#define UFFDIO_WRITE_MODE_WP UFFDIO_COPY_MODE_WP
+#define UFFDIO_WRITE_MODE_ZEROPAGE ((__u64)1<<2)
+
#endif /* _LINUX_USERFAULTFD_H */