@@ -70,6 +70,8 @@
#include <linux/sizes.h>
#include <linux/hugetlb.h>
#include <linux/highmem.h>
+#include <linux/namei.h>
+#include <linux/fsnotify.h>
#define CREATE_TRACE_POINTS
#include <trace/events/io_uring.h>
@@ -322,6 +324,10 @@ struct io_async_rw {
ssize_t size;
};
+struct io_async_open {
+ struct filename *filename;
+};
+
struct io_async_ctx {
struct io_uring_sqe sqe;
union {
@@ -329,6 +335,7 @@ struct io_async_ctx {
struct io_async_msghdr msg;
struct io_async_connect connect;
struct io_timeout_data timeout;
+ struct io_async_open open;
};
};
@@ -879,8 +886,11 @@ static void __io_free_req(struct io_kiocb *req)
{
struct io_ring_ctx *ctx = req->ctx;
- if (req->io)
+ if (req->io) {
+ if (req->io->sqe.opcode == IORING_OP_OPENAT)
+ putname(req->io->open.filename);
kfree(req->io);
+ }
if (req->file && !(req->flags & REQ_F_FIXED_FILE))
fput(req->file);
if (req->flags & REQ_F_INFLIGHT) {
@@ -2001,6 +2011,88 @@ static int io_fallocate(struct io_kiocb *req, struct io_kiocb **nxt,
return 0;
}
+static int io_openat_prep(struct io_kiocb *req, struct io_async_ctx *io)
+{
+ const struct io_uring_sqe *sqe = req->sqe;
+ const char __user *fname;
+ int ret;
+
+ fname = u64_to_user_ptr(READ_ONCE(sqe->addr));
+ io->open.filename = getname(fname);
+ if (!IS_ERR(io->open.filename))
+ return 0;
+
+ ret = PTR_ERR(io->open.filename);
+ io->open.filename = NULL;
+ return ret;
+}
+
+static int io_openat(struct io_kiocb *req, struct io_kiocb **nxt,
+ bool force_nonblock)
+{
+ const struct io_uring_sqe *sqe = req->sqe;
+ struct filename *filename;
+ const char __user *fname;
+ struct open_flags op;
+ int flags, ret, dfd;
+ struct file *file;
+ umode_t mode;
+
+ if (sqe->ioprio || sqe->buf_index)
+ return -EINVAL;
+
+ dfd = READ_ONCE(sqe->fd);
+ mode = READ_ONCE(sqe->len);
+ fname = u64_to_user_ptr(READ_ONCE(sqe->addr));
+ flags = READ_ONCE(sqe->open_flags);
+
+ ret = build_open_flags(flags, mode, &op);
+ if (ret)
+ goto err;
+ if (force_nonblock)
+ op.lookup_flags |= LOOKUP_NONBLOCK;
+ if (req->io) {
+ filename = req->io->open.filename;
+ } else {
+ filename = getname(fname);
+ if (IS_ERR(filename)) {
+ ret = PTR_ERR(filename);
+ goto err;
+ }
+ }
+
+ ret = get_unused_fd_flags(flags);
+ if (ret < 0)
+ goto err;
+
+ file = do_filp_open(dfd, filename, &op);
+ if (IS_ERR(file)) {
+ put_unused_fd(ret);
+ ret = PTR_ERR(file);
+ if (ret == -EAGAIN) {
+ req->io = kmalloc(sizeof(*req->io), GFP_KERNEL);
+ if (!req->io) {
+ ret = -ENOMEM;
+ goto err;
+ }
+ req->io->open.filename = filename;
+ req->work.flags |= IO_WQ_WORK_NEEDS_FILES;
+ return -EAGAIN;
+ }
+ putname(filename);
+ } else {
+ fsnotify_open(file);
+ fd_install(ret, file);
+ putname(filename);
+ }
+err:
+ if (ret < 0)
+ req_set_fail_links(req);
+ io_cqring_add_event(req, ret);
+ io_put_req_find_next(req, nxt);
+ return 0;
+}
+
static int io_prep_sfr(struct io_kiocb *req, const struct io_uring_sqe *sqe)
{
struct io_ring_ctx *ctx = req->ctx;
@@ -2909,6 +3001,9 @@ static int io_req_defer_prep(struct io_kiocb *req, struct io_async_ctx *io)
return io_timeout_prep(req, io, false);
case IORING_OP_LINK_TIMEOUT:
return io_timeout_prep(req, io, true);
+ case IORING_OP_OPENAT:
+ ret = io_openat_prep(req, io);
+ break;
default:
req->io = io;
return 0;
@@ -3018,6 +3113,9 @@ static int io_issue_sqe(struct io_kiocb *req, struct io_kiocb **nxt,
case IORING_OP_FALLOCATE:
ret = io_fallocate(req, nxt, force_nonblock);
break;
+ case IORING_OP_OPENAT:
+ ret = io_openat(req, nxt, force_nonblock);
+ break;
default:
ret = -EINVAL;
break;
@@ -3102,7 +3200,7 @@ static bool io_req_op_valid(int op)
return op >= IORING_OP_NOP && op < IORING_OP_LAST;
}
-static int io_op_needs_file(const struct io_uring_sqe *sqe)
+static int io_op_needs_file(const struct io_uring_sqe *sqe, int fd)
{
int op = READ_ONCE(sqe->opcode);
@@ -3114,6 +3212,8 @@ static int io_op_needs_file(const struct io_uring_sqe *sqe)
case IORING_OP_ASYNC_CANCEL:
case IORING_OP_LINK_TIMEOUT:
return 0;
+ case IORING_OP_OPENAT:
+ return fd != -1;
default:
if (io_req_op_valid(op))
return 1;
@@ -3142,7 +3242,7 @@ static int io_req_set_file(struct io_submit_state *state, struct io_kiocb *req)
if (flags & IOSQE_IO_DRAIN)
req->flags |= REQ_F_IO_DRAIN;
- ret = io_op_needs_file(req->sqe);
+ ret = io_op_needs_file(req->sqe, fd);
if (ret <= 0)
return ret;
@@ -34,6 +34,7 @@ struct io_uring_sqe {
__u32 timeout_flags;
__u32 accept_flags;
__u32 cancel_flags;
+ __u32 open_flags;
};
__u64 user_data; /* data to be passed back at completion time */
union {
@@ -77,6 +78,7 @@ enum {
IORING_OP_LINK_TIMEOUT,
IORING_OP_CONNECT,
IORING_OP_FALLOCATE,
+ IORING_OP_OPENAT,
/* this goes last, obviously */
IORING_OP_LAST,
This works just like openat(2), except it can be performed async. For the normal case of a non-blocking path lookup this will complete inline. If we have to do IO to perform the open, it'll be done from async context. Signed-off-by: Jens Axboe <axboe@kernel.dk> --- fs/io_uring.c | 106 +++++++++++++++++++++++++++++++++- include/uapi/linux/io_uring.h | 2 + 2 files changed, 105 insertions(+), 3 deletions(-)