diff mbox series

[v2] eventfd: convert to f_op->read_iter()

Message ID c71b2091-a86e-cc81-056d-de2f1e839f50@kernel.dk (mailing list archive)
State New, archived
Headers show
Series [v2] eventfd: convert to f_op->read_iter() | expand

Commit Message

Jens Axboe May 1, 2020, 5:18 p.m. UTC
eventfd is using ->read() as it's file_operations read handler, but
this prevents passing in information about whether a given IO operation
is blocking or not. We can only use the file flags for that. To support
async (-EAGAIN/poll based) retries for io_uring, we need ->read_iter()
support. Convert eventfd to using ->read_iter().

Signed-off-by: Jens Axboe <axboe@kernel.dk>

---

Since v1:

- Add FMODE_NOWAIT to the eventfd file

Comments

Al Viro May 1, 2020, 5:43 p.m. UTC | #1
On Fri, May 01, 2020 at 11:18:05AM -0600, Jens Axboe wrote:

> -	if (res > 0 && put_user(ucnt, (__u64 __user *)buf))
> +	if (res > 0 && copy_to_iter(&ucnt, res, iov) < res)

*whoa*

It is correct, but only because here res > 0 <=> res == 8.
And that's not trivial at the first glance.

Please, turn that into something like

	if (iov_iter_count(to) < sizeof(ucnt))
		return -EINVAL;
	spin_lock_irq(&ctx->wqh.lock);
	if (!ctx->count) {
		if (unlikely(file->f_flags & O_NONBLOCK) {
			spin_unlock_irq(&ctx->wqh.lock)
			return -EAGAIN;
		}
		__add_wait_queue(&ctx->wqh, &wait);
		for (;;) {
			set_current_state(TASK_INTERRUPTIBLE);
			if (ctx->count)
				break;
			if (signal_pending(current)) {
				spin_unlock_irq(&ctx->wqh.lock)
				return -ERESTARTSYS;
			}
			spin_unlock_irq(&ctx->wqh.lock);
			schedule();
			spin_lock_irq(&ctx->wqh.lock);
		}
		__remove_wait_queue(&ctx->wqh, &wait);
		__set_current_state(TASK_RUNNING);
	}
	eventfd_ctx_do_read(ctx, &ucnt);
	if (waitqueue_active(&ctx->wqh))
		wake_up_locked_poll(&ctx->wqh, EPOLLOUT);
	spin_unlock_irq(&ctx->wqh.lock);
	if (unlikely(copy_to_iter(&ucnt, sizeof(ucnt), to) != sizeof(ucnt)))
		return -EFAULT;
	return sizeof(ucnt);
Jens Axboe May 1, 2020, 5:49 p.m. UTC | #2
On 5/1/20 11:43 AM, Al Viro wrote:
> On Fri, May 01, 2020 at 11:18:05AM -0600, Jens Axboe wrote:
> 
>> -	if (res > 0 && put_user(ucnt, (__u64 __user *)buf))
>> +	if (res > 0 && copy_to_iter(&ucnt, res, iov) < res)
> 
> *whoa*
> 
> It is correct, but only because here res > 0 <=> res == 8.
> And that's not trivial at the first glance.
> 
> Please, turn that into something like

Looks good to me, just one minor edit:

> 	if (iov_iter_count(to) < sizeof(ucnt))
> 		return -EINVAL;
> 	spin_lock_irq(&ctx->wqh.lock);
> 	if (!ctx->count) {
> 		if (unlikely(file->f_flags & O_NONBLOCK) {
> 			spin_unlock_irq(&ctx->wqh.lock)
> 			return -EAGAIN;
> 		}
> 		__add_wait_queue(&ctx->wqh, &wait);
> 		for (;;) {
> 			set_current_state(TASK_INTERRUPTIBLE);
> 			if (ctx->count)
> 				break;
> 			if (signal_pending(current)) {
> 				spin_unlock_irq(&ctx->wqh.lock)
> 				return -ERESTARTSYS;
> 			}

We need to remove waitq and re-set task state here. I'll run a sanity
check on that and send out a v3.
diff mbox series

Patch

diff --git a/fs/eventfd.c b/fs/eventfd.c
index 78e41c7c3d05..d590c2141d39 100644
--- a/fs/eventfd.c
+++ b/fs/eventfd.c
@@ -216,10 +216,11 @@  int eventfd_ctx_remove_wait_queue(struct eventfd_ctx *ctx, wait_queue_entry_t *w
 }
 EXPORT_SYMBOL_GPL(eventfd_ctx_remove_wait_queue);
 
-static ssize_t eventfd_read(struct file *file, char __user *buf, size_t count,
-			    loff_t *ppos)
+static ssize_t eventfd_read(struct kiocb *iocb, struct iov_iter *iov)
 {
+	struct file *file = iocb->ki_filp;
 	struct eventfd_ctx *ctx = file->private_data;
+	size_t count = iov_iter_count(iov);
 	ssize_t res;
 	__u64 ucnt = 0;
 	DECLARE_WAITQUEUE(wait, current);
@@ -231,7 +232,8 @@  static ssize_t eventfd_read(struct file *file, char __user *buf, size_t count,
 	res = -EAGAIN;
 	if (ctx->count > 0)
 		res = sizeof(ucnt);
-	else if (!(file->f_flags & O_NONBLOCK)) {
+	else if (!(file->f_flags & O_NONBLOCK) &&
+		 !(iocb->ki_flags & IOCB_NOWAIT)) {
 		__add_wait_queue(&ctx->wqh, &wait);
 		for (;;) {
 			set_current_state(TASK_INTERRUPTIBLE);
@@ -257,7 +259,7 @@  static ssize_t eventfd_read(struct file *file, char __user *buf, size_t count,
 	}
 	spin_unlock_irq(&ctx->wqh.lock);
 
-	if (res > 0 && put_user(ucnt, (__u64 __user *)buf))
+	if (res > 0 && copy_to_iter(&ucnt, res, iov) < res)
 		return -EFAULT;
 
 	return res;
@@ -329,7 +331,7 @@  static const struct file_operations eventfd_fops = {
 #endif
 	.release	= eventfd_release,
 	.poll		= eventfd_poll,
-	.read		= eventfd_read,
+	.read_iter	= eventfd_read,
 	.write		= eventfd_write,
 	.llseek		= noop_llseek,
 };
@@ -427,8 +429,17 @@  static int do_eventfd(unsigned int count, int flags)
 
 	fd = anon_inode_getfd("[eventfd]", &eventfd_fops, ctx,
 			      O_RDWR | (flags & EFD_SHARED_FCNTL_FLAGS));
-	if (fd < 0)
+	if (fd < 0) {
 		eventfd_free_ctx(ctx);
+	} else {
+		struct file *file;
+
+		file = fget(fd);
+		if (file) {
+			file->f_mode |= FMODE_NOWAIT;
+			fput(file);
+		}
+	}
 
 	return fd;
 }