Message ID | c71b2091-a86e-cc81-056d-de2f1e839f50@kernel.dk (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | [v2] eventfd: convert to f_op->read_iter() | expand |
On Fri, May 01, 2020 at 11:18:05AM -0600, Jens Axboe wrote: > - if (res > 0 && put_user(ucnt, (__u64 __user *)buf)) > + if (res > 0 && copy_to_iter(&ucnt, res, iov) < res) *whoa* It is correct, but only because here res > 0 <=> res == 8. And that's not trivial at the first glance. Please, turn that into something like if (iov_iter_count(to) < sizeof(ucnt)) return -EINVAL; spin_lock_irq(&ctx->wqh.lock); if (!ctx->count) { if (unlikely(file->f_flags & O_NONBLOCK) { spin_unlock_irq(&ctx->wqh.lock) return -EAGAIN; } __add_wait_queue(&ctx->wqh, &wait); for (;;) { set_current_state(TASK_INTERRUPTIBLE); if (ctx->count) break; if (signal_pending(current)) { spin_unlock_irq(&ctx->wqh.lock) return -ERESTARTSYS; } spin_unlock_irq(&ctx->wqh.lock); schedule(); spin_lock_irq(&ctx->wqh.lock); } __remove_wait_queue(&ctx->wqh, &wait); __set_current_state(TASK_RUNNING); } eventfd_ctx_do_read(ctx, &ucnt); if (waitqueue_active(&ctx->wqh)) wake_up_locked_poll(&ctx->wqh, EPOLLOUT); spin_unlock_irq(&ctx->wqh.lock); if (unlikely(copy_to_iter(&ucnt, sizeof(ucnt), to) != sizeof(ucnt))) return -EFAULT; return sizeof(ucnt);
On 5/1/20 11:43 AM, Al Viro wrote: > On Fri, May 01, 2020 at 11:18:05AM -0600, Jens Axboe wrote: > >> - if (res > 0 && put_user(ucnt, (__u64 __user *)buf)) >> + if (res > 0 && copy_to_iter(&ucnt, res, iov) < res) > > *whoa* > > It is correct, but only because here res > 0 <=> res == 8. > And that's not trivial at the first glance. > > Please, turn that into something like Looks good to me, just one minor edit: > if (iov_iter_count(to) < sizeof(ucnt)) > return -EINVAL; > spin_lock_irq(&ctx->wqh.lock); > if (!ctx->count) { > if (unlikely(file->f_flags & O_NONBLOCK) { > spin_unlock_irq(&ctx->wqh.lock) > return -EAGAIN; > } > __add_wait_queue(&ctx->wqh, &wait); > for (;;) { > set_current_state(TASK_INTERRUPTIBLE); > if (ctx->count) > break; > if (signal_pending(current)) { > spin_unlock_irq(&ctx->wqh.lock) > return -ERESTARTSYS; > } We need to remove waitq and re-set task state here. I'll run a sanity check on that and send out a v3.
diff --git a/fs/eventfd.c b/fs/eventfd.c index 78e41c7c3d05..d590c2141d39 100644 --- a/fs/eventfd.c +++ b/fs/eventfd.c @@ -216,10 +216,11 @@ int eventfd_ctx_remove_wait_queue(struct eventfd_ctx *ctx, wait_queue_entry_t *w } EXPORT_SYMBOL_GPL(eventfd_ctx_remove_wait_queue); -static ssize_t eventfd_read(struct file *file, char __user *buf, size_t count, - loff_t *ppos) +static ssize_t eventfd_read(struct kiocb *iocb, struct iov_iter *iov) { + struct file *file = iocb->ki_filp; struct eventfd_ctx *ctx = file->private_data; + size_t count = iov_iter_count(iov); ssize_t res; __u64 ucnt = 0; DECLARE_WAITQUEUE(wait, current); @@ -231,7 +232,8 @@ static ssize_t eventfd_read(struct file *file, char __user *buf, size_t count, res = -EAGAIN; if (ctx->count > 0) res = sizeof(ucnt); - else if (!(file->f_flags & O_NONBLOCK)) { + else if (!(file->f_flags & O_NONBLOCK) && + !(iocb->ki_flags & IOCB_NOWAIT)) { __add_wait_queue(&ctx->wqh, &wait); for (;;) { set_current_state(TASK_INTERRUPTIBLE); @@ -257,7 +259,7 @@ static ssize_t eventfd_read(struct file *file, char __user *buf, size_t count, } spin_unlock_irq(&ctx->wqh.lock); - if (res > 0 && put_user(ucnt, (__u64 __user *)buf)) + if (res > 0 && copy_to_iter(&ucnt, res, iov) < res) return -EFAULT; return res; @@ -329,7 +331,7 @@ static const struct file_operations eventfd_fops = { #endif .release = eventfd_release, .poll = eventfd_poll, - .read = eventfd_read, + .read_iter = eventfd_read, .write = eventfd_write, .llseek = noop_llseek, }; @@ -427,8 +429,17 @@ static int do_eventfd(unsigned int count, int flags) fd = anon_inode_getfd("[eventfd]", &eventfd_fops, ctx, O_RDWR | (flags & EFD_SHARED_FCNTL_FLAGS)); - if (fd < 0) + if (fd < 0) { eventfd_free_ctx(ctx); + } else { + struct file *file; + + file = fget(fd); + if (file) { + file->f_mode |= FMODE_NOWAIT; + fput(file); + } + } return fd; }
eventfd is using ->read() as it's file_operations read handler, but this prevents passing in information about whether a given IO operation is blocking or not. We can only use the file flags for that. To support async (-EAGAIN/poll based) retries for io_uring, we need ->read_iter() support. Convert eventfd to using ->read_iter(). Signed-off-by: Jens Axboe <axboe@kernel.dk> --- Since v1: - Add FMODE_NOWAIT to the eventfd file