diff mbox series

[v10,5/5] io_uring: add fgetxattr and getxattr support

Message ID 20211229203002.4110839-6-shr@fb.com (mailing list archive)
State New, archived
Headers show
Series io_uring: add xattr support | expand

Commit Message

Stefan Roesch Dec. 29, 2021, 8:30 p.m. UTC
This adds support to io_uring for the fgetxattr and getxattr API.

Signed-off-by: Stefan Roesch <shr@fb.com>
---
 fs/io_uring.c                 | 152 ++++++++++++++++++++++++++++++++++
 include/uapi/linux/io_uring.h |   2 +
 2 files changed, 154 insertions(+)

Comments

Al Viro Dec. 30, 2021, 1:41 a.m. UTC | #1
On Wed, Dec 29, 2021 at 12:30:02PM -0800, Stefan Roesch wrote:

> +static int io_getxattr(struct io_kiocb *req, unsigned int issue_flags)
> +{
> +	struct io_xattr *ix = &req->xattr;
> +	unsigned int lookup_flags = LOOKUP_FOLLOW;
> +	struct path path;
> +	int ret;
> +
> +	if (issue_flags & IO_URING_F_NONBLOCK)
> +		return -EAGAIN;
> +
> +retry:
> +	ret = do_user_path_at_empty(AT_FDCWD, ix->filename, lookup_flags, &path);
> +	if (!ret) {
> +		ret = do_getxattr(mnt_user_ns(path.mnt),
> +				path.dentry,
> +				ix->ctx.kname->name,
> +				(void __user *)ix->ctx.value,
> +				ix->ctx.size);
> +
> +		path_put(&path);
> +		if (retry_estale(ret, lookup_flags)) {
> +			lookup_flags |= LOOKUP_REVAL;
> +			goto retry;
> +		}
> +	}
> +	putname(ix->filename);
> +
> +	__io_getxattr_finish(req, ret);
> +	return 0;
> +}

Looking at that one...  Is there any reason to have that loop (from retry: to
putname() call) outside of fs/xattr.c?  Come to think of that, why bother
polluting your struct io_xattr with ->filename?

Note, BTW, that we already have this:
static ssize_t path_getxattr(const char __user *pathname,
                             const char __user *name, void __user *value,
			     size_t size, unsigned int lookup_flags)
{
	struct path path;
	ssize_t error;
retry:
	error = user_path_at(AT_FDCWD, pathname, lookup_flags, &path);
	if (error)
		return error;
	error = getxattr(mnt_user_ns(path.mnt), path.dentry, name, value, size);
	path_put(&path);
	if (retry_estale(error, lookup_flags)) {
		lookup_flags |= LOOKUP_REVAL;
		goto retry;
	}
	return error;
}
in there.  The only potential benefit here would be to avoid repeated getname
in case of having hit -ESTALE and going to repeat the entire fucking pathwalk
with maximal paranoia, asking the server(s) involved to revalidate on every
step, etc.

If we end up going there, who the hell *cares* about the costs of less than
a page worth of copy_from_user()?  We are already on a very slow path as it
is, so what's the point?
Al Viro Dec. 30, 2021, 1:46 a.m. UTC | #2
On Thu, Dec 30, 2021 at 01:41:35AM +0000, Al Viro wrote:
> On Wed, Dec 29, 2021 at 12:30:02PM -0800, Stefan Roesch wrote:
> 
> > +static int io_getxattr(struct io_kiocb *req, unsigned int issue_flags)
> > +{
> > +	struct io_xattr *ix = &req->xattr;
> > +	unsigned int lookup_flags = LOOKUP_FOLLOW;
> > +	struct path path;
> > +	int ret;
> > +
> > +	if (issue_flags & IO_URING_F_NONBLOCK)
> > +		return -EAGAIN;
> > +
> > +retry:
> > +	ret = do_user_path_at_empty(AT_FDCWD, ix->filename, lookup_flags, &path);
> > +	if (!ret) {
> > +		ret = do_getxattr(mnt_user_ns(path.mnt),
> > +				path.dentry,
> > +				ix->ctx.kname->name,
> > +				(void __user *)ix->ctx.value,
> > +				ix->ctx.size);
> > +
> > +		path_put(&path);
> > +		if (retry_estale(ret, lookup_flags)) {
> > +			lookup_flags |= LOOKUP_REVAL;
> > +			goto retry;
> > +		}
> > +	}
> > +	putname(ix->filename);
> > +
> > +	__io_getxattr_finish(req, ret);
> > +	return 0;
> > +}
> 
> Looking at that one...  Is there any reason to have that loop (from retry: to
> putname() call) outside of fs/xattr.c?  Come to think of that, why bother
> polluting your struct io_xattr with ->filename?
> 
> Note, BTW, that we already have this:
> static ssize_t path_getxattr(const char __user *pathname,
>                              const char __user *name, void __user *value,
> 			     size_t size, unsigned int lookup_flags)
> {
> 	struct path path;
> 	ssize_t error;
> retry:
> 	error = user_path_at(AT_FDCWD, pathname, lookup_flags, &path);
> 	if (error)
> 		return error;
> 	error = getxattr(mnt_user_ns(path.mnt), path.dentry, name, value, size);
> 	path_put(&path);
> 	if (retry_estale(error, lookup_flags)) {
> 		lookup_flags |= LOOKUP_REVAL;
> 		goto retry;
> 	}
> 	return error;
> }
> in there.  The only potential benefit here would be to avoid repeated getname
> in case of having hit -ESTALE and going to repeat the entire fucking pathwalk
> with maximal paranoia, asking the server(s) involved to revalidate on every
> step, etc.
> 
> If we end up going there, who the hell *cares* about the costs of less than
> a page worth of copy_from_user()?  We are already on a very slow path as it
> is, so what's the point?

BTW, if the answer is along the lines of "we want to copy the name in prep
phase fo $REASONS", I would like to hear what it is that makes getxattr()
different from statx() in that respect.
Stefan Roesch Dec. 30, 2021, 8:01 p.m. UTC | #3
On 12/29/21 5:41 PM, Al Viro wrote:
> On Wed, Dec 29, 2021 at 12:30:02PM -0800, Stefan Roesch wrote:
> 
>> +static int io_getxattr(struct io_kiocb *req, unsigned int issue_flags)
>> +{
>> +	struct io_xattr *ix = &req->xattr;
>> +	unsigned int lookup_flags = LOOKUP_FOLLOW;
>> +	struct path path;
>> +	int ret;
>> +
>> +	if (issue_flags & IO_URING_F_NONBLOCK)
>> +		return -EAGAIN;
>> +
>> +retry:
>> +	ret = do_user_path_at_empty(AT_FDCWD, ix->filename, lookup_flags, &path);
>> +	if (!ret) {
>> +		ret = do_getxattr(mnt_user_ns(path.mnt),
>> +				path.dentry,
>> +				ix->ctx.kname->name,
>> +				(void __user *)ix->ctx.value,
>> +				ix->ctx.size);
>> +
>> +		path_put(&path);
>> +		if (retry_estale(ret, lookup_flags)) {
>> +			lookup_flags |= LOOKUP_REVAL;
>> +			goto retry;
>> +		}
>> +	}
>> +	putname(ix->filename);
>> +
>> +	__io_getxattr_finish(req, ret);
>> +	return 0;
>> +}
> 
> Looking at that one...  Is there any reason to have that loop (from retry: to
> putname() call) outside of fs/xattr.c?  Come to think of that, why bother
> polluting your struct io_xattr with ->filename?
> 
> Note, BTW, that we already have this:
> static ssize_t path_getxattr(const char __user *pathname,
>                              const char __user *name, void __user *value,
> 			     size_t size, unsigned int lookup_flags)
> {
> 	struct path path;
> 	ssize_t error;
> retry:
> 	error = user_path_at(AT_FDCWD, pathname, lookup_flags, &path);
> 	if (error)
> 		return error;
> 	error = getxattr(mnt_user_ns(path.mnt), path.dentry, name, value, size);
> 	path_put(&path);
> 	if (retry_estale(error, lookup_flags)) {
> 		lookup_flags |= LOOKUP_REVAL;
> 		goto retry;
> 	}
> 	return error;
> }
> in there.  The only potential benefit here would be to avoid repeated getname
> in case of having hit -ESTALE and going to repeat the entire fucking pathwalk
> with maximal paranoia, asking the server(s) involved to revalidate on every
> step, etc.
> 
> If we end up going there, who the hell *cares* about the costs of less than
> a page worth of copy_from_user()?  We are already on a very slow path as it
> is, so what's the point?

I think Jens already answered this why we capture the parameters during the prep
step. From Jens:

"

- The prep of it, this happens inline from the system call where the
  request, or requests, are submitted. The prep phase should ensure that
  argument structs are stable. Hence a caller can prep a request and
  have memory on stack, as long as it submits before it becomes invalid.
  An example of that are iovecs for readv/writev. The caller does not
  need to have them stable for the duration of the request, just across
  submit. That's the io_${cmd}_prep() helpers.

- The execution of it. May be separate from prep and from an async
  worker. Where the lower layers don't support a nonblocking attempt,
  they are always done async. The statx stuff is an example of that.

Hence prep needs to copy from userland on the prep side always for the
statx family, as execution will happen out-of-line from the submission.

"

Otherwise we need to copy the path value the user passed in, storing a filename struct
seems to be the better choice.
diff mbox series

Patch

diff --git a/fs/io_uring.c b/fs/io_uring.c
index f1b325dd81d5..4f2ffe43cb1d 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -1130,6 +1130,10 @@  static const struct io_op_def io_op_defs[] = {
 		.needs_file = 1
 	},
 	[IORING_OP_SETXATTR] = {},
+	[IORING_OP_FGETXATTR] = {
+		.needs_file = 1
+	},
+	[IORING_OP_GETXATTR] = {},
 };
 
 /* requests with any of those set should undergo io_disarm_next() */
@@ -3899,6 +3903,137 @@  static int io_renameat(struct io_kiocb *req, unsigned int issue_flags)
 	return 0;
 }
 
+static int __io_getxattr_prep(struct io_kiocb *req,
+			      const struct io_uring_sqe *sqe)
+{
+	struct io_xattr *ix = &req->xattr;
+	const char __user *name;
+	int ret;
+
+	if (unlikely(req->ctx->flags & IORING_SETUP_IOPOLL))
+		return -EINVAL;
+	if (unlikely(sqe->ioprio))
+		return -EINVAL;
+	if (unlikely(req->flags & REQ_F_FIXED_FILE))
+		return -EBADF;
+
+	ix->filename = NULL;
+	ix->ctx.kvalue = NULL;
+	name = u64_to_user_ptr(READ_ONCE(sqe->addr));
+	ix->ctx.value = u64_to_user_ptr(READ_ONCE(sqe->addr2));
+	ix->ctx.size = READ_ONCE(sqe->len);
+	ix->ctx.flags = READ_ONCE(sqe->xattr_flags);
+
+	if (ix->ctx.flags)
+		return -EINVAL;
+
+	ix->ctx.kname = kmalloc(sizeof(*ix->ctx.kname), GFP_KERNEL);
+	if (!ix->ctx.kname)
+		return -ENOMEM;
+
+	ret = strncpy_from_user(ix->ctx.kname->name, name,
+				sizeof(ix->ctx.kname->name));
+	if (!ret || ret == sizeof(ix->ctx.kname->name))
+		ret = -ERANGE;
+	if (ret < 0) {
+		kfree(ix->ctx.kname);
+		return ret;
+	}
+
+	req->flags |= REQ_F_NEED_CLEANUP;
+	return 0;
+}
+
+static int io_fgetxattr_prep(struct io_kiocb *req,
+			     const struct io_uring_sqe *sqe)
+{
+	return __io_getxattr_prep(req, sqe);
+}
+
+static int io_getxattr_prep(struct io_kiocb *req,
+			    const struct io_uring_sqe *sqe)
+{
+	struct io_xattr *ix = &req->xattr;
+	const char __user *path;
+	int ret;
+
+	ret = __io_getxattr_prep(req, sqe);
+	if (ret)
+		return ret;
+
+	path = u64_to_user_ptr(READ_ONCE(sqe->addr3));
+
+	ix->filename = getname_flags(path, LOOKUP_FOLLOW, NULL);
+	if (IS_ERR(ix->filename)) {
+		ret = PTR_ERR(ix->filename);
+		ix->filename = NULL;
+	}
+
+	return ret;
+}
+
+static void __io_getxattr_finish(struct io_kiocb *req, int ret)
+{
+	struct xattr_ctx *ctx = &req->xattr.ctx;
+
+	req->flags &= ~REQ_F_NEED_CLEANUP;
+
+	kfree(ctx->kname);
+	if (ret < 0)
+		req_set_fail(req);
+
+	io_req_complete(req, ret);
+}
+
+static int io_fgetxattr(struct io_kiocb *req, unsigned int issue_flags)
+{
+	struct io_xattr *ix = &req->xattr;
+	int ret;
+
+	if (issue_flags & IO_URING_F_NONBLOCK)
+		return -EAGAIN;
+
+	ret = do_getxattr(mnt_user_ns(req->file->f_path.mnt),
+			req->file->f_path.dentry,
+			ix->ctx.kname->name,
+			(void __user *)ix->ctx.value,
+			ix->ctx.size);
+
+	__io_getxattr_finish(req, ret);
+	return 0;
+}
+
+static int io_getxattr(struct io_kiocb *req, unsigned int issue_flags)
+{
+	struct io_xattr *ix = &req->xattr;
+	unsigned int lookup_flags = LOOKUP_FOLLOW;
+	struct path path;
+	int ret;
+
+	if (issue_flags & IO_URING_F_NONBLOCK)
+		return -EAGAIN;
+
+retry:
+	ret = do_user_path_at_empty(AT_FDCWD, ix->filename, lookup_flags, &path);
+	if (!ret) {
+		ret = do_getxattr(mnt_user_ns(path.mnt),
+				path.dentry,
+				ix->ctx.kname->name,
+				(void __user *)ix->ctx.value,
+				ix->ctx.size);
+
+		path_put(&path);
+		if (retry_estale(ret, lookup_flags)) {
+			lookup_flags |= LOOKUP_REVAL;
+			goto retry;
+		}
+	}
+	putname(ix->filename);
+
+	__io_getxattr_finish(req, ret);
+	return 0;
+}
+
 static int __io_setxattr_prep(struct io_kiocb *req,
 			const struct io_uring_sqe *sqe)
 {
@@ -6772,6 +6907,10 @@  static int io_req_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
 		return io_fsetxattr_prep(req, sqe);
 	case IORING_OP_SETXATTR:
 		return io_setxattr_prep(req, sqe);
+	case IORING_OP_FGETXATTR:
+		return io_fgetxattr_prep(req, sqe);
+	case IORING_OP_GETXATTR:
+		return io_getxattr_prep(req, sqe);
 	}
 
 	printk_once(KERN_WARNING "io_uring: unhandled opcode %d\n",
@@ -6921,6 +7060,13 @@  static void io_clean_op(struct io_kiocb *req)
 			kfree(req->xattr.ctx.kname);
 			kvfree(req->xattr.ctx.kvalue);
 			break;
+		case IORING_OP_GETXATTR:
+			if (req->xattr.filename)
+				putname(req->xattr.filename);
+			fallthrough;
+		case IORING_OP_FGETXATTR:
+			kfree(req->xattr.ctx.kname);
+			break;
 		}
 	}
 	if ((req->flags & REQ_F_POLLED) && req->apoll) {
@@ -7072,6 +7218,12 @@  static int io_issue_sqe(struct io_kiocb *req, unsigned int issue_flags)
 	case IORING_OP_SETXATTR:
 		ret = io_setxattr(req, issue_flags);
 		break;
+	case IORING_OP_FGETXATTR:
+		ret = io_fgetxattr(req, issue_flags);
+		break;
+	case IORING_OP_GETXATTR:
+		ret = io_getxattr(req, issue_flags);
+		break;
 	default:
 		ret = -EINVAL;
 		break;
diff --git a/include/uapi/linux/io_uring.h b/include/uapi/linux/io_uring.h
index c62a8bec8cd4..efc7ac9b3a6b 100644
--- a/include/uapi/linux/io_uring.h
+++ b/include/uapi/linux/io_uring.h
@@ -148,6 +148,8 @@  enum {
 	IORING_OP_GETDENTS,
 	IORING_OP_FSETXATTR,
 	IORING_OP_SETXATTR,
+	IORING_OP_FGETXATTR,
+	IORING_OP_GETXATTR,
 
 	/* this goes last, obviously */
 	IORING_OP_LAST,