Message ID | 6b29f015-bd7c-0601-cf94-2c077285b933@kernel.dk (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | [v4] eventfd: convert to f_op->read_iter() | expand |
On Fri, May 01, 2020 at 01:11:09PM -0600, Jens Axboe wrote: > + flags &= EFD_SHARED_FCNTL_FLAGS; > + flags |= O_RDWR; > + fd = get_unused_fd_flags(flags); > if (fd < 0) > - eventfd_free_ctx(ctx); > + goto err; > + > + file = anon_inode_getfile("[eventfd]", &eventfd_fops, ctx, flags); > + if (IS_ERR(file)) { > + put_unused_fd(fd); > + fd = PTR_ERR(file); > + goto err; > + } > > + file->f_mode |= FMODE_NOWAIT; > + fd_install(fd, file); > + return fd; > +err: > + eventfd_free_ctx(ctx); > return fd; > } Looks sane... I can take it via vfs.git, or leave it for you if you have other stuff in the same area...
On 5/1/20 5:12 PM, Al Viro wrote: > On Fri, May 01, 2020 at 01:11:09PM -0600, Jens Axboe wrote: >> + flags &= EFD_SHARED_FCNTL_FLAGS; >> + flags |= O_RDWR; >> + fd = get_unused_fd_flags(flags); >> if (fd < 0) >> - eventfd_free_ctx(ctx); >> + goto err; >> + >> + file = anon_inode_getfile("[eventfd]", &eventfd_fops, ctx, flags); >> + if (IS_ERR(file)) { >> + put_unused_fd(fd); >> + fd = PTR_ERR(file); >> + goto err; >> + } >> >> + file->f_mode |= FMODE_NOWAIT; >> + fd_install(fd, file); >> + return fd; >> +err: >> + eventfd_free_ctx(ctx); >> return fd; >> } > > Looks sane... I can take it via vfs.git, or leave it for you if you > have other stuff in the same area... Would be great if you can queue it up in vfs.git, thanks! Don't have anything else that'd conflict with this.
On Fri, May 01, 2020 at 05:54:09PM -0600, Jens Axboe wrote: > On 5/1/20 5:12 PM, Al Viro wrote: > > On Fri, May 01, 2020 at 01:11:09PM -0600, Jens Axboe wrote: > >> + flags &= EFD_SHARED_FCNTL_FLAGS; > >> + flags |= O_RDWR; > >> + fd = get_unused_fd_flags(flags); > >> if (fd < 0) > >> - eventfd_free_ctx(ctx); > >> + goto err; > >> + > >> + file = anon_inode_getfile("[eventfd]", &eventfd_fops, ctx, flags); > >> + if (IS_ERR(file)) { > >> + put_unused_fd(fd); > >> + fd = PTR_ERR(file); > >> + goto err; > >> + } > >> > >> + file->f_mode |= FMODE_NOWAIT; > >> + fd_install(fd, file); > >> + return fd; > >> +err: > >> + eventfd_free_ctx(ctx); > >> return fd; > >> } > > > > Looks sane... I can take it via vfs.git, or leave it for you if you > > have other stuff in the same area... > > Would be great if you can queue it up in vfs.git, thanks! Don't have > anything else that'd conflict with this. Applied; BTW, what happens if ctx->id = ida_simple_get(&eventfd_ida, 0, 0, GFP_KERNEL); fails? Quitely succeed with BS value (-ENOSPC/-ENOMEM) shown by eventfd_show_fdinfo()?
On 5/3/20 7:46 AM, Al Viro wrote: > On Fri, May 01, 2020 at 05:54:09PM -0600, Jens Axboe wrote: >> On 5/1/20 5:12 PM, Al Viro wrote: >>> On Fri, May 01, 2020 at 01:11:09PM -0600, Jens Axboe wrote: >>>> + flags &= EFD_SHARED_FCNTL_FLAGS; >>>> + flags |= O_RDWR; >>>> + fd = get_unused_fd_flags(flags); >>>> if (fd < 0) >>>> - eventfd_free_ctx(ctx); >>>> + goto err; >>>> + >>>> + file = anon_inode_getfile("[eventfd]", &eventfd_fops, ctx, flags); >>>> + if (IS_ERR(file)) { >>>> + put_unused_fd(fd); >>>> + fd = PTR_ERR(file); >>>> + goto err; >>>> + } >>>> >>>> + file->f_mode |= FMODE_NOWAIT; >>>> + fd_install(fd, file); >>>> + return fd; >>>> +err: >>>> + eventfd_free_ctx(ctx); >>>> return fd; >>>> } >>> >>> Looks sane... I can take it via vfs.git, or leave it for you if you >>> have other stuff in the same area... >> >> Would be great if you can queue it up in vfs.git, thanks! Don't have >> anything else that'd conflict with this. > > Applied; BTW, what happens if > ctx->id = ida_simple_get(&eventfd_ida, 0, 0, GFP_KERNEL); > fails? Quitely succeed with BS value (-ENOSPC/-ENOMEM) shown by > eventfd_show_fdinfo()? Huh yeah that's odd, not sure how I missed that when touching code near it. Want a followup patch to fix that issue?
On 5/3/20 8:42 AM, Jens Axboe wrote: > On 5/3/20 7:46 AM, Al Viro wrote: >> On Fri, May 01, 2020 at 05:54:09PM -0600, Jens Axboe wrote: >>> On 5/1/20 5:12 PM, Al Viro wrote: >>>> On Fri, May 01, 2020 at 01:11:09PM -0600, Jens Axboe wrote: >>>>> + flags &= EFD_SHARED_FCNTL_FLAGS; >>>>> + flags |= O_RDWR; >>>>> + fd = get_unused_fd_flags(flags); >>>>> if (fd < 0) >>>>> - eventfd_free_ctx(ctx); >>>>> + goto err; >>>>> + >>>>> + file = anon_inode_getfile("[eventfd]", &eventfd_fops, ctx, flags); >>>>> + if (IS_ERR(file)) { >>>>> + put_unused_fd(fd); >>>>> + fd = PTR_ERR(file); >>>>> + goto err; >>>>> + } >>>>> >>>>> + file->f_mode |= FMODE_NOWAIT; >>>>> + fd_install(fd, file); >>>>> + return fd; >>>>> +err: >>>>> + eventfd_free_ctx(ctx); >>>>> return fd; >>>>> } >>>> >>>> Looks sane... I can take it via vfs.git, or leave it for you if you >>>> have other stuff in the same area... >>> >>> Would be great if you can queue it up in vfs.git, thanks! Don't have >>> anything else that'd conflict with this. >> >> Applied; BTW, what happens if >> ctx->id = ida_simple_get(&eventfd_ida, 0, 0, GFP_KERNEL); >> fails? Quitely succeed with BS value (-ENOSPC/-ENOMEM) shown by >> eventfd_show_fdinfo()? > > Huh yeah that's odd, not sure how I missed that when touching code > near it. Want a followup patch to fix that issue? This should do the trick. Ideally we'd change the order of these, and shove this fix into 5.7, but not sure it's super important since this bug is pretty old. Would make stable backports easier, though... Let me know how you want to handle it, as it'll impact the one you have already queued up. diff --git a/fs/eventfd.c b/fs/eventfd.c index 20f0fd4d56e1..96081efdd0c9 100644 --- a/fs/eventfd.c +++ b/fs/eventfd.c @@ -422,6 +422,10 @@ static int do_eventfd(unsigned int count, int flags) ctx->count = count; ctx->flags = flags; ctx->id = ida_simple_get(&eventfd_ida, 0, 0, GFP_KERNEL); + if (ctx->id < 0) { + fd = ctx->id; + goto err; + } flags &= EFD_SHARED_FCNTL_FLAGS; flags |= O_RDWR;
On Fri, May 01, 2020 at 01:11:09PM -0600, Jens Axboe wrote: > eventfd is using ->read() as it's file_operations read handler, but > this prevents passing in information about whether a given IO operation > is blocking or not. We can only use the file flags for that. To support > async (-EAGAIN/poll based) retries for io_uring, we need ->read_iter() > support. Convert eventfd to using ->read_iter(). > > With ->read_iter(), we can support IOCB_NOWAIT. Ensure the fd setup > is done such that we set file->f_mode with FMODE_NOWAIT. Can you add a anon_inode_getfd_mode that passes extra flags for f_mode instead of opencoding it? Especially as I expect more users that might want to handle IOCB_NOWAIT.
diff --git a/fs/eventfd.c b/fs/eventfd.c index 78e41c7c3d05..20f0fd4d56e1 100644 --- a/fs/eventfd.c +++ b/fs/eventfd.c @@ -216,32 +216,32 @@ int eventfd_ctx_remove_wait_queue(struct eventfd_ctx *ctx, wait_queue_entry_t *w } EXPORT_SYMBOL_GPL(eventfd_ctx_remove_wait_queue); -static ssize_t eventfd_read(struct file *file, char __user *buf, size_t count, - loff_t *ppos) +static ssize_t eventfd_read(struct kiocb *iocb, struct iov_iter *to) { + struct file *file = iocb->ki_filp; struct eventfd_ctx *ctx = file->private_data; - ssize_t res; __u64 ucnt = 0; DECLARE_WAITQUEUE(wait, current); - if (count < sizeof(ucnt)) + if (iov_iter_count(to) < sizeof(ucnt)) return -EINVAL; - spin_lock_irq(&ctx->wqh.lock); - res = -EAGAIN; - if (ctx->count > 0) - res = sizeof(ucnt); - else if (!(file->f_flags & O_NONBLOCK)) { + if (!ctx->count) { + if ((file->f_flags & O_NONBLOCK) || + (iocb->ki_flags & IOCB_NOWAIT)) { + spin_unlock_irq(&ctx->wqh.lock); + return -EAGAIN; + } __add_wait_queue(&ctx->wqh, &wait); for (;;) { set_current_state(TASK_INTERRUPTIBLE); - if (ctx->count > 0) { - res = sizeof(ucnt); + if (ctx->count) break; - } if (signal_pending(current)) { - res = -ERESTARTSYS; - break; + __remove_wait_queue(&ctx->wqh, &wait); + __set_current_state(TASK_RUNNING); + spin_unlock_irq(&ctx->wqh.lock); + return -ERESTARTSYS; } spin_unlock_irq(&ctx->wqh.lock); schedule(); @@ -250,17 +250,14 @@ static ssize_t eventfd_read(struct file *file, char __user *buf, size_t count, __remove_wait_queue(&ctx->wqh, &wait); __set_current_state(TASK_RUNNING); } - if (likely(res > 0)) { - eventfd_ctx_do_read(ctx, &ucnt); - if (waitqueue_active(&ctx->wqh)) - wake_up_locked_poll(&ctx->wqh, EPOLLOUT); - } + eventfd_ctx_do_read(ctx, &ucnt); + if (waitqueue_active(&ctx->wqh)) + wake_up_locked_poll(&ctx->wqh, EPOLLOUT); spin_unlock_irq(&ctx->wqh.lock); - - if (res > 0 && put_user(ucnt, (__u64 __user *)buf)) + if (unlikely(copy_to_iter(&ucnt, sizeof(ucnt), to) != sizeof(ucnt))) return -EFAULT; - return res; + return sizeof(ucnt); } static ssize_t eventfd_write(struct file *file, const char __user *buf, size_t count, @@ -329,7 +326,7 @@ static const struct file_operations eventfd_fops = { #endif .release = eventfd_release, .poll = eventfd_poll, - .read = eventfd_read, + .read_iter = eventfd_read, .write = eventfd_write, .llseek = noop_llseek, }; @@ -406,6 +403,7 @@ EXPORT_SYMBOL_GPL(eventfd_ctx_fileget); static int do_eventfd(unsigned int count, int flags) { struct eventfd_ctx *ctx; + struct file *file; int fd; /* Check the EFD_* constants for consistency. */ @@ -425,11 +423,24 @@ static int do_eventfd(unsigned int count, int flags) ctx->flags = flags; ctx->id = ida_simple_get(&eventfd_ida, 0, 0, GFP_KERNEL); - fd = anon_inode_getfd("[eventfd]", &eventfd_fops, ctx, - O_RDWR | (flags & EFD_SHARED_FCNTL_FLAGS)); + flags &= EFD_SHARED_FCNTL_FLAGS; + flags |= O_RDWR; + fd = get_unused_fd_flags(flags); if (fd < 0) - eventfd_free_ctx(ctx); + goto err; + + file = anon_inode_getfile("[eventfd]", &eventfd_fops, ctx, flags); + if (IS_ERR(file)) { + put_unused_fd(fd); + fd = PTR_ERR(file); + goto err; + } + file->f_mode |= FMODE_NOWAIT; + fd_install(fd, file); + return fd; +err: + eventfd_free_ctx(ctx); return fd; }
eventfd is using ->read() as it's file_operations read handler, but this prevents passing in information about whether a given IO operation is blocking or not. We can only use the file flags for that. To support async (-EAGAIN/poll based) retries for io_uring, we need ->read_iter() support. Convert eventfd to using ->read_iter(). With ->read_iter(), we can support IOCB_NOWAIT. Ensure the fd setup is done such that we set file->f_mode with FMODE_NOWAIT. Signed-off-by: Jens Axboe <axboe@kernel.dk> --- Since v3: - Ensure we fiddle ->f_mode before doing fd_install() on the fd Since v2: - Cleanup eventfd_read() as per Al's suggestions Since v1: - Add FMODE_NOWAIT to the eventfd file