@@ -262,20 +262,24 @@ static __poll_t timerfd_poll(struct file *file, poll_table *wait)
return events;
}
-static ssize_t timerfd_read(struct file *file, char __user *buf, size_t count,
- loff_t *ppos)
+static ssize_t timerfd_read_iter(struct kiocb *iocb, struct iov_iter *to)
{
+ struct file *file = iocb->ki_filp;
struct timerfd_ctx *ctx = file->private_data;
ssize_t res;
u64 ticks = 0;
- if (count < sizeof(ticks))
+ if (iov_iter_count(to) < sizeof(ticks))
return -EINVAL;
+
spin_lock_irq(&ctx->wqh.lock);
- if (file->f_flags & O_NONBLOCK)
- res = -EAGAIN;
- else
- res = wait_event_interruptible_locked_irq(ctx->wqh, ctx->ticks);
+ if (!ctx->ticks) {
+ if (file->f_flags & O_NONBLOCK || iocb->ki_flags & IOCB_NOWAIT)
+ res = -EAGAIN;
+ else
+ res = wait_event_interruptible_locked_irq(ctx->wqh,
+ ctx->ticks);
+ }
/*
* If clock has changed, we do not care about the
@@ -313,7 +317,7 @@ static ssize_t timerfd_read(struct file *file, char __user *buf, size_t count,
}
spin_unlock_irq(&ctx->wqh.lock);
if (ticks)
- res = put_user(ticks, (u64 __user *) buf) ? -EFAULT: sizeof(ticks);
+ res = copy_to_iter(&ticks, sizeof(ticks), to);
return res;
}
@@ -384,7 +388,7 @@ static long timerfd_ioctl(struct file *file, unsigned int cmd, unsigned long arg
static const struct file_operations timerfd_fops = {
.release = timerfd_release,
.poll = timerfd_poll,
- .read = timerfd_read,
+ .read_iter = timerfd_read_iter,
.llseek = noop_llseek,
.show_fdinfo = timerfd_show,
.unlocked_ioctl = timerfd_ioctl,
@@ -407,6 +411,7 @@ SYSCALL_DEFINE2(timerfd_create, int, clockid, int, flags)
{
int ufd;
struct timerfd_ctx *ctx;
+ struct file *file;
/* Check the TFD_* constants for consistency. */
BUILD_BUG_ON(TFD_CLOEXEC != O_CLOEXEC);
@@ -443,11 +448,22 @@ SYSCALL_DEFINE2(timerfd_create, int, clockid, int, flags)
ctx->moffs = ktime_mono_to_real(0);
- ufd = anon_inode_getfd("[timerfd]", &timerfd_fops, ctx,
- O_RDWR | (flags & TFD_SHARED_FCNTL_FLAGS));
- if (ufd < 0)
+ ufd = get_unused_fd_flags(O_RDWR | (flags & TFD_SHARED_FCNTL_FLAGS));
+ if (ufd < 0) {
kfree(ctx);
+ return ufd;
+ }
+
+ file = anon_inode_getfile("[timerfd]", &timerfd_fops, ctx,
+ O_RDWR | (flags & TFD_SHARED_FCNTL_FLAGS));
+ if (IS_ERR(file)) {
+ put_unused_fd(ufd);
+ kfree(ctx);
+ return PTR_ERR(file);
+ }
+ file->f_mode |= FMODE_NOWAIT;
+ fd_install(ufd, file);
return ufd;
}
timerfd is utterly buggy wrt nonblocking reads - regardless of whether or not there's data available, it returns -EAGAIN. This is incompatible with how nonblocking reads should work. If there's data available, it should be returned. Convert it to use fops->read_iter() so it can handle both nonblocking fds and IOCB_NOWAIT, mark it as FMODE_NOWAIT to signify that it's compatible with nonblocking reads, and finally have timerfd_read_iter() properly check for data availability in nonblocking mode. Cc: stable@vger.kernel.org Signed-off-by: Jens Axboe <axboe@kernel.dk> --- Can't believe it's this broken... Patch has been tested with a test case that was reported via io_uring, and I also ran the ltp timerfd test cases and it passes all of those too.