diff mbox series

[v6,4/5] io_uring: add fsetxattr and setxattr support

Message ID 20211222210127.958902-5-shr@fb.com (mailing list archive)
State New, archived
Headers show
Series io_uring: add xattr support | expand

Commit Message

Stefan Roesch Dec. 22, 2021, 9:01 p.m. UTC
This adds support to io_uring for the fsetxattr and setxattr API.

Signed-off-by: Stefan Roesch <shr@fb.com>
---
 fs/io_uring.c                 | 170 ++++++++++++++++++++++++++++++++++
 include/uapi/linux/io_uring.h |   6 +-
 2 files changed, 175 insertions(+), 1 deletion(-)

Comments

Christian Brauner Dec. 23, 2021, 2:52 p.m. UTC | #1
On Wed, Dec 22, 2021 at 01:01:26PM -0800, Stefan Roesch wrote:
> This adds support to io_uring for the fsetxattr and setxattr API.
> 
> Signed-off-by: Stefan Roesch <shr@fb.com>
> ---
>  fs/io_uring.c                 | 170 ++++++++++++++++++++++++++++++++++
>  include/uapi/linux/io_uring.h |   6 +-
>  2 files changed, 175 insertions(+), 1 deletion(-)
> 
> diff --git a/fs/io_uring.c b/fs/io_uring.c
> index c8258c784116..8b6c70d6cacc 100644
> --- a/fs/io_uring.c
> +++ b/fs/io_uring.c
> @@ -82,6 +82,7 @@
>  #include <linux/audit.h>
>  #include <linux/security.h>
>  #include <linux/atomic-ref.h>
> +#include <linux/xattr.h>
>  
>  #define CREATE_TRACE_POINTS
>  #include <trace/events/io_uring.h>
> @@ -726,6 +727,13 @@ struct io_async_rw {
>  	struct wait_page_queue		wpq;
>  };
>  
> +struct io_xattr {
> +	struct file			*file;
> +	struct xattr_ctx		ctx;
> +	void				*value;
> +	struct filename			*filename;
> +};
> +
>  enum {
>  	REQ_F_FIXED_FILE_BIT	= IOSQE_FIXED_FILE_BIT,
>  	REQ_F_IO_DRAIN_BIT	= IOSQE_IO_DRAIN_BIT,
> @@ -866,6 +874,7 @@ struct io_kiocb {
>  		struct io_symlink	symlink;
>  		struct io_hardlink	hardlink;
>  		struct io_getdents	getdents;
> +		struct io_xattr		xattr;
>  	};
>  
>  	u8				opcode;
> @@ -1118,6 +1127,10 @@ static const struct io_op_def io_op_defs[] = {
>  	[IORING_OP_GETDENTS] = {
>  		.needs_file		= 1,
>  	},
> +	[IORING_OP_FSETXATTR] = {
> +		.needs_file = 1
> +	},
> +	[IORING_OP_SETXATTR] = {},
>  };
>  
>  /* requests with any of those set should undergo io_disarm_next() */
> @@ -3887,6 +3900,144 @@ static int io_renameat(struct io_kiocb *req, unsigned int issue_flags)
>  	return 0;
>  }
>  
> +static int __io_setxattr_prep(struct io_kiocb *req,
> +			const struct io_uring_sqe *sqe,
> +			struct user_namespace *user_ns)
> +{
> +	struct io_xattr *ix = &req->xattr;
> +	const char __user *name;
> +	void *ret;
> +
> +	if (unlikely(req->ctx->flags & IORING_SETUP_IOPOLL))
> +		return -EINVAL;
> +	if (unlikely(sqe->ioprio))
> +		return -EINVAL;
> +	if (unlikely(req->flags & REQ_F_FIXED_FILE))
> +		return -EBADF;
> +
> +	ix->filename = NULL;
> +	name = u64_to_user_ptr(READ_ONCE(sqe->addr));
> +	ix->ctx.value = u64_to_user_ptr(READ_ONCE(sqe->addr2));
> +	ix->ctx.size = READ_ONCE(sqe->len);
> +	ix->ctx.flags = READ_ONCE(sqe->xattr_flags);
> +
> +	ix->ctx.kname = kmalloc(XATTR_NAME_MAX + 1, GFP_KERNEL);
> +	if (!ix->ctx.kname)
> +		return -ENOMEM;
> +	ix->ctx.kname_sz = XATTR_NAME_MAX + 1;
> +
> +	ret = setxattr_setup(user_ns, name, &ix->ctx);

Looking at this a bit closer, the setxattr_setup() function converts the
vfs caps prior to vfs_setxattr(). That shouldn't be done there though.
The conversion should be done when mnt_want_write() is held in
__io_setxattr() exactly how we do for setxattr()-based calls in
fs/xattr.c. This will guard against changes of relevant mount properties
(current or future). It will also allow you to simplify your
setxattr_setup() function a bit and you don't need to retrieve the
mount's idmapping until __io_setxattr().

Right now you're splitting updating the xattrs over the prep and commit
stage and I worry that in fully async contexts this is easy to miss. So
I'd rather do it in one place. Since we can't move it all into
vfs_setxattr() similar to what we did for fscaps because it's used in a
bunch of contexts where the conversion isn't wanted we should simply
expose do_setxattr() similar to do_getxattr() you're adding.

So on top of your current patchset I'd suggest you do something like the
following (completely untested):

From 6bcd3efc3293bb91599ee73272262ac596ab4608 Mon Sep 17 00:00:00 2001
From: Christian Brauner <christian.brauner@ubuntu.com>
Date: Thu, 23 Dec 2021 15:23:14 +0100
Subject: [PATCH] UNTESTED

---
 fs/internal.h |  8 +++++---
 fs/io_uring.c | 21 +++++++++-----------
 fs/xattr.c    | 55 ++++++++++++++++++++++++++++++++++-----------------
 3 files changed, 51 insertions(+), 33 deletions(-)

diff --git a/fs/internal.h b/fs/internal.h
index ea0433799dbc..08259fa98b2e 100644
--- a/fs/internal.h
+++ b/fs/internal.h
@@ -222,6 +222,8 @@ ssize_t do_getxattr(struct user_namespace *mnt_userns,
 		    void __user *value,
 		    size_t size);
 
-void *setxattr_setup(struct user_namespace *mnt_userns,
-		     const char __user *name,
-		     struct xattr_ctx *ctx);
+int do_setxattr(struct user_namespace *mnt_userns, struct dentry *dentry,
+		struct xattr_ctx *ctx, void *xattr_val);
+
+int setxattr_copy(const char __user *name, struct xattr_ctx *ctx,
+		  void **xattr_val);
diff --git a/fs/io_uring.c b/fs/io_uring.c
index 5dd01f19d915..c910c29e1632 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -4040,12 +4040,11 @@ static int io_getxattr(struct io_kiocb *req, unsigned int issue_flags)
 }
 
 static int __io_setxattr_prep(struct io_kiocb *req,
-			const struct io_uring_sqe *sqe,
-			struct user_namespace *user_ns)
+			const struct io_uring_sqe *sqe)
 {
 	struct io_xattr *ix = &req->xattr;
 	const char __user *name;
-	void *ret;
+	int ret;
 
 	if (unlikely(req->ctx->flags & IORING_SETUP_IOPOLL))
 		return -EINVAL;
@@ -4065,13 +4064,12 @@ static int __io_setxattr_prep(struct io_kiocb *req,
 		return -ENOMEM;
 	ix->ctx.kname_sz = XATTR_NAME_MAX + 1;
 
-	ret = setxattr_setup(user_ns, name, &ix->ctx);
-	if (IS_ERR(ret)) {
+	ret = setxattr_copy(name, &ix->ctx, &ix->value);
+	if (ret) {
 		kfree(ix->ctx.kname);
-		return PTR_ERR(ret);
+		return ret;
 	}
 
-	ix->value = ret;
 	req->flags |= REQ_F_NEED_CLEANUP;
 	return 0;
 }
@@ -4083,7 +4081,7 @@ static int io_setxattr_prep(struct io_kiocb *req,
 	const char __user *path;
 	int ret;
 
-	ret = __io_setxattr_prep(req, sqe, current_user_ns());
+	ret = __io_setxattr_prep(req, sqe);
 	if (ret)
 		return ret;
 
@@ -4101,7 +4099,7 @@ static int io_setxattr_prep(struct io_kiocb *req,
 static int io_fsetxattr_prep(struct io_kiocb *req,
 			const struct io_uring_sqe *sqe)
 {
-	return __io_setxattr_prep(req, sqe, file_mnt_user_ns(req->file));
+	return __io_setxattr_prep(req, sqe);
 }
 
 static int __io_setxattr(struct io_kiocb *req, unsigned int issue_flags,
@@ -4112,9 +4110,8 @@ static int __io_setxattr(struct io_kiocb *req, unsigned int issue_flags,
 
 	ret = mnt_want_write(path->mnt);
 	if (!ret) {
-		ret = vfs_setxattr(mnt_user_ns(path->mnt), path->dentry,
-				ix->ctx.kname, ix->value, ix->ctx.size,
-				ix->ctx.flags);
+		ret = do_setxattr(mnt_user_ns(path->mnt), path->dentry,
+				  &ix->ctx, ix->value);
 		mnt_drop_write(path->mnt);
 	}
 
diff --git a/fs/xattr.c b/fs/xattr.c
index a675c7f0ea0c..03a44c5895d1 100644
--- a/fs/xattr.c
+++ b/fs/xattr.c
@@ -542,40 +542,59 @@ EXPORT_SYMBOL_GPL(vfs_removexattr);
  * Extended attribute SET operations
  */
 
-void *setxattr_setup(struct user_namespace *mnt_userns, const char __user *name,
-		struct xattr_ctx *ctx)
+int setxattr_copy(const char __user *name, struct xattr_ctx *ctx,
+		  void **xattr_val)
 {
 	void *kvalue = NULL;
 	int error;
 
 	if (ctx->flags & ~(XATTR_CREATE|XATTR_REPLACE))
-		return ERR_PTR(-EINVAL);
+		return -EINVAL;
 
 	error = strncpy_from_user(ctx->kname, name, ctx->kname_sz);
 	if (error == 0 || error == ctx->kname_sz)
-		return  ERR_PTR(-ERANGE);
+		return  -ERANGE;
 	if (error < 0)
-		return ERR_PTR(error);
+		return error;
 
 	if (ctx->size) {
 		if (ctx->size > XATTR_SIZE_MAX)
-			return ERR_PTR(-E2BIG);
+			return -E2BIG;
 
 		kvalue = kvmalloc(ctx->size, GFP_KERNEL);
 		if (!kvalue)
-			return ERR_PTR(-ENOMEM);
+			return -ENOMEM;
 
 		if (copy_from_user(kvalue, ctx->value, ctx->size)) {
 			kvfree(kvalue);
-			return ERR_PTR(-EFAULT);
+			return -EFAULT;
 		}
-
-		if ((strcmp(ctx->kname, XATTR_NAME_POSIX_ACL_ACCESS) == 0) ||
-		    (strcmp(ctx->kname, XATTR_NAME_POSIX_ACL_DEFAULT) == 0))
-			posix_acl_fix_xattr_from_user(mnt_userns, kvalue, ctx->size);
 	}
 
-	return kvalue;
+	*xattr_val = kvalue;
+	return 0;
+}
+
+static void setxattr_convert(struct user_namespace *mnt_userns,
+			     struct xattr_ctx *ctx, void *kvalue)
+{
+	if (ctx->size &&
+	    ((strcmp(ctx->kname, XATTR_NAME_POSIX_ACL_ACCESS) == 0) ||
+	     (strcmp(ctx->kname, XATTR_NAME_POSIX_ACL_DEFAULT) == 0)))
+		posix_acl_fix_xattr_from_user(mnt_userns, kvalue, ctx->size);
+}
+
+int do_setxattr(struct user_namespace *mnt_userns, struct dentry *dentry,
+		struct xattr_ctx *ctx, void *xattr_val)
+{
+	void *kvalue = NULL;
+	int error;
+
+	setxattr_convert(mnt_userns, ctx, kvalue);
+	error = vfs_setxattr(mnt_userns, dentry, ctx->kname,
+			     kvalue, ctx->size, ctx->flags);
+	kvfree(kvalue);
+	return error;
 }
 
 static long
@@ -591,14 +610,14 @@ setxattr(struct user_namespace *mnt_userns, struct dentry *d,
 		.kname_sz = sizeof(kname),
 		.flags    = flags,
 	};
-	void *kvalue;
+	void *kvalue = NULL;
 	int error;
 
-	kvalue = setxattr_setup(mnt_userns, name, &ctx);
-	if (IS_ERR(kvalue))
-		return PTR_ERR(kvalue);
+	error = setxattr_copy(name, &ctx, &kvalue);
+	if (error)
+		return error;
 
-	error = vfs_setxattr(mnt_userns, d, kname, kvalue, size, flags);
+	error = do_setxattr(mnt_userns, d, &ctx, kvalue);
 
 	kvfree(kvalue);
 	return error;
Stefan Roesch Dec. 23, 2021, 8 p.m. UTC | #2
On 12/23/21 6:52 AM, Christian Brauner wrote:
> On Wed, Dec 22, 2021 at 01:01:26PM -0800, Stefan Roesch wrote:
>> This adds support to io_uring for the fsetxattr and setxattr API.
>>
>> Signed-off-by: Stefan Roesch <shr@fb.com>
>> ---
>>  fs/io_uring.c                 | 170 ++++++++++++++++++++++++++++++++++
>>  include/uapi/linux/io_uring.h |   6 +-
>>  2 files changed, 175 insertions(+), 1 deletion(-)
>>
>> diff --git a/fs/io_uring.c b/fs/io_uring.c
>> index c8258c784116..8b6c70d6cacc 100644
>> --- a/fs/io_uring.c
>> +++ b/fs/io_uring.c
>> @@ -82,6 +82,7 @@
>>  #include <linux/audit.h>
>>  #include <linux/security.h>
>>  #include <linux/atomic-ref.h>
>> +#include <linux/xattr.h>
>>  
>>  #define CREATE_TRACE_POINTS
>>  #include <trace/events/io_uring.h>
>> @@ -726,6 +727,13 @@ struct io_async_rw {
>>  	struct wait_page_queue		wpq;
>>  };
>>  
>> +struct io_xattr {
>> +	struct file			*file;
>> +	struct xattr_ctx		ctx;
>> +	void				*value;
>> +	struct filename			*filename;
>> +};
>> +
>>  enum {
>>  	REQ_F_FIXED_FILE_BIT	= IOSQE_FIXED_FILE_BIT,
>>  	REQ_F_IO_DRAIN_BIT	= IOSQE_IO_DRAIN_BIT,
>> @@ -866,6 +874,7 @@ struct io_kiocb {
>>  		struct io_symlink	symlink;
>>  		struct io_hardlink	hardlink;
>>  		struct io_getdents	getdents;
>> +		struct io_xattr		xattr;
>>  	};
>>  
>>  	u8				opcode;
>> @@ -1118,6 +1127,10 @@ static const struct io_op_def io_op_defs[] = {
>>  	[IORING_OP_GETDENTS] = {
>>  		.needs_file		= 1,
>>  	},
>> +	[IORING_OP_FSETXATTR] = {
>> +		.needs_file = 1
>> +	},
>> +	[IORING_OP_SETXATTR] = {},
>>  };
>>  
>>  /* requests with any of those set should undergo io_disarm_next() */
>> @@ -3887,6 +3900,144 @@ static int io_renameat(struct io_kiocb *req, unsigned int issue_flags)
>>  	return 0;
>>  }
>>  
>> +static int __io_setxattr_prep(struct io_kiocb *req,
>> +			const struct io_uring_sqe *sqe,
>> +			struct user_namespace *user_ns)
>> +{
>> +	struct io_xattr *ix = &req->xattr;
>> +	const char __user *name;
>> +	void *ret;
>> +
>> +	if (unlikely(req->ctx->flags & IORING_SETUP_IOPOLL))
>> +		return -EINVAL;
>> +	if (unlikely(sqe->ioprio))
>> +		return -EINVAL;
>> +	if (unlikely(req->flags & REQ_F_FIXED_FILE))
>> +		return -EBADF;
>> +
>> +	ix->filename = NULL;
>> +	name = u64_to_user_ptr(READ_ONCE(sqe->addr));
>> +	ix->ctx.value = u64_to_user_ptr(READ_ONCE(sqe->addr2));
>> +	ix->ctx.size = READ_ONCE(sqe->len);
>> +	ix->ctx.flags = READ_ONCE(sqe->xattr_flags);
>> +
>> +	ix->ctx.kname = kmalloc(XATTR_NAME_MAX + 1, GFP_KERNEL);
>> +	if (!ix->ctx.kname)
>> +		return -ENOMEM;
>> +	ix->ctx.kname_sz = XATTR_NAME_MAX + 1;
>> +
>> +	ret = setxattr_setup(user_ns, name, &ix->ctx);
> 
> Looking at this a bit closer, the setxattr_setup() function converts the
> vfs caps prior to vfs_setxattr(). That shouldn't be done there though.
> The conversion should be done when mnt_want_write() is held in
> __io_setxattr() exactly how we do for setxattr()-based calls in
> fs/xattr.c. This will guard against changes of relevant mount properties
> (current or future). It will also allow you to simplify your
> setxattr_setup() function a bit and you don't need to retrieve the
> mount's idmapping until __io_setxattr().
> 
> Right now you're splitting updating the xattrs over the prep and commit
> stage and I worry that in fully async contexts this is easy to miss. So
> I'd rather do it in one place. Since we can't move it all into
> vfs_setxattr() similar to what we did for fscaps because it's used in a
> bunch of contexts where the conversion isn't wanted we should simply
> expose do_setxattr() similar to do_getxattr() you're adding.
> 
> So on top of your current patchset I'd suggest you do something like the
> following (completely untested):
> 


Thanks for your review and the code. I only changed the below code that the
do_setxattr does not use a kvalue, I assume you wanted to use xattr_val.


> From 6bcd3efc3293bb91599ee73272262ac596ab4608 Mon Sep 17 00:00:00 2001
> From: Christian Brauner <christian.brauner@ubuntu.com>
> Date: Thu, 23 Dec 2021 15:23:14 +0100
> Subject: [PATCH] UNTESTED
> 
> ---
>  fs/internal.h |  8 +++++---
>  fs/io_uring.c | 21 +++++++++-----------
>  fs/xattr.c    | 55 ++++++++++++++++++++++++++++++++++-----------------
>  3 files changed, 51 insertions(+), 33 deletions(-)
> 
> diff --git a/fs/internal.h b/fs/internal.h
> index ea0433799dbc..08259fa98b2e 100644
> --- a/fs/internal.h
> +++ b/fs/internal.h
> @@ -222,6 +222,8 @@ ssize_t do_getxattr(struct user_namespace *mnt_userns,
>  		    void __user *value,
>  		    size_t size);
>  
> -void *setxattr_setup(struct user_namespace *mnt_userns,
> -		     const char __user *name,
> -		     struct xattr_ctx *ctx);
> +int do_setxattr(struct user_namespace *mnt_userns, struct dentry *dentry,
> +		struct xattr_ctx *ctx, void *xattr_val);
> +
> +int setxattr_copy(const char __user *name, struct xattr_ctx *ctx,
> +		  void **xattr_val);
> diff --git a/fs/io_uring.c b/fs/io_uring.c
> index 5dd01f19d915..c910c29e1632 100644
> --- a/fs/io_uring.c
> +++ b/fs/io_uring.c
> @@ -4040,12 +4040,11 @@ static int io_getxattr(struct io_kiocb *req, unsigned int issue_flags)
>  }
>  
>  static int __io_setxattr_prep(struct io_kiocb *req,
> -			const struct io_uring_sqe *sqe,
> -			struct user_namespace *user_ns)
> +			const struct io_uring_sqe *sqe)
>  {
>  	struct io_xattr *ix = &req->xattr;
>  	const char __user *name;
> -	void *ret;
> +	int ret;
>  
>  	if (unlikely(req->ctx->flags & IORING_SETUP_IOPOLL))
>  		return -EINVAL;
> @@ -4065,13 +4064,12 @@ static int __io_setxattr_prep(struct io_kiocb *req,
>  		return -ENOMEM;
>  	ix->ctx.kname_sz = XATTR_NAME_MAX + 1;
>  
> -	ret = setxattr_setup(user_ns, name, &ix->ctx);
> -	if (IS_ERR(ret)) {
> +	ret = setxattr_copy(name, &ix->ctx, &ix->value);
> +	if (ret) {
>  		kfree(ix->ctx.kname);
> -		return PTR_ERR(ret);
> +		return ret;
>  	}
>  
> -	ix->value = ret;
>  	req->flags |= REQ_F_NEED_CLEANUP;
>  	return 0;
>  }
> @@ -4083,7 +4081,7 @@ static int io_setxattr_prep(struct io_kiocb *req,
>  	const char __user *path;
>  	int ret;
>  
> -	ret = __io_setxattr_prep(req, sqe, current_user_ns());
> +	ret = __io_setxattr_prep(req, sqe);
>  	if (ret)
>  		return ret;
>  
> @@ -4101,7 +4099,7 @@ static int io_setxattr_prep(struct io_kiocb *req,
>  static int io_fsetxattr_prep(struct io_kiocb *req,
>  			const struct io_uring_sqe *sqe)
>  {
> -	return __io_setxattr_prep(req, sqe, file_mnt_user_ns(req->file));
> +	return __io_setxattr_prep(req, sqe);
>  }
>  
>  static int __io_setxattr(struct io_kiocb *req, unsigned int issue_flags,
> @@ -4112,9 +4110,8 @@ static int __io_setxattr(struct io_kiocb *req, unsigned int issue_flags,
>  
>  	ret = mnt_want_write(path->mnt);
>  	if (!ret) {
> -		ret = vfs_setxattr(mnt_user_ns(path->mnt), path->dentry,
> -				ix->ctx.kname, ix->value, ix->ctx.size,
> -				ix->ctx.flags);
> +		ret = do_setxattr(mnt_user_ns(path->mnt), path->dentry,
> +				  &ix->ctx, ix->value);
>  		mnt_drop_write(path->mnt);
>  	}
>  
> diff --git a/fs/xattr.c b/fs/xattr.c
> index a675c7f0ea0c..03a44c5895d1 100644
> --- a/fs/xattr.c
> +++ b/fs/xattr.c
> @@ -542,40 +542,59 @@ EXPORT_SYMBOL_GPL(vfs_removexattr);
>   * Extended attribute SET operations
>   */
>  
> -void *setxattr_setup(struct user_namespace *mnt_userns, const char __user *name,
> -		struct xattr_ctx *ctx)
> +int setxattr_copy(const char __user *name, struct xattr_ctx *ctx,
> +		  void **xattr_val)
>  {
>  	void *kvalue = NULL;
>  	int error;
>  
>  	if (ctx->flags & ~(XATTR_CREATE|XATTR_REPLACE))
> -		return ERR_PTR(-EINVAL);
> +		return -EINVAL;
>  
>  	error = strncpy_from_user(ctx->kname, name, ctx->kname_sz);
>  	if (error == 0 || error == ctx->kname_sz)
> -		return  ERR_PTR(-ERANGE);
> +		return  -ERANGE;
>  	if (error < 0)
> -		return ERR_PTR(error);
> +		return error;
>  
>  	if (ctx->size) {
>  		if (ctx->size > XATTR_SIZE_MAX)
> -			return ERR_PTR(-E2BIG);
> +			return -E2BIG;
>  
>  		kvalue = kvmalloc(ctx->size, GFP_KERNEL);
>  		if (!kvalue)
> -			return ERR_PTR(-ENOMEM);
> +			return -ENOMEM;
>  
>  		if (copy_from_user(kvalue, ctx->value, ctx->size)) {
>  			kvfree(kvalue);
> -			return ERR_PTR(-EFAULT);
> +			return -EFAULT;
>  		}
> -
> -		if ((strcmp(ctx->kname, XATTR_NAME_POSIX_ACL_ACCESS) == 0) ||
> -		    (strcmp(ctx->kname, XATTR_NAME_POSIX_ACL_DEFAULT) == 0))
> -			posix_acl_fix_xattr_from_user(mnt_userns, kvalue, ctx->size);
>  	}
>  
> -	return kvalue;
> +	*xattr_val = kvalue;
> +	return 0;
> +}
> +
> +static void setxattr_convert(struct user_namespace *mnt_userns,
> +			     struct xattr_ctx *ctx, void *kvalue)
> +{
> +	if (ctx->size &&
> +	    ((strcmp(ctx->kname, XATTR_NAME_POSIX_ACL_ACCESS) == 0) ||
> +	     (strcmp(ctx->kname, XATTR_NAME_POSIX_ACL_DEFAULT) == 0)))
> +		posix_acl_fix_xattr_from_user(mnt_userns, kvalue, ctx->size);
> +}
> +
> +int do_setxattr(struct user_namespace *mnt_userns, struct dentry *dentry,
> +		struct xattr_ctx *ctx, void *xattr_val)
> +{
> +	void *kvalue = NULL;
> +	int error;
> +
> +	setxattr_convert(mnt_userns, ctx, kvalue);
> +	error = vfs_setxattr(mnt_userns, dentry, ctx->kname,
> +			     kvalue, ctx->size, ctx->flags);
> +	kvfree(kvalue);
> +	return error;
>  }
>  
>  static long
> @@ -591,14 +610,14 @@ setxattr(struct user_namespace *mnt_userns, struct dentry *d,
>  		.kname_sz = sizeof(kname),
>  		.flags    = flags,
>  	};
> -	void *kvalue;
> +	void *kvalue = NULL;
>  	int error;
>  
> -	kvalue = setxattr_setup(mnt_userns, name, &ctx);
> -	if (IS_ERR(kvalue))
> -		return PTR_ERR(kvalue);
> +	error = setxattr_copy(name, &ctx, &kvalue);
> +	if (error)
> +		return error;
>  
> -	error = vfs_setxattr(mnt_userns, d, kname, kvalue, size, flags);
> +	error = do_setxattr(mnt_userns, d, &ctx, kvalue);
>  
>  	kvfree(kvalue);
>  	return error;
>
diff mbox series

Patch

diff --git a/fs/io_uring.c b/fs/io_uring.c
index c8258c784116..8b6c70d6cacc 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -82,6 +82,7 @@ 
 #include <linux/audit.h>
 #include <linux/security.h>
 #include <linux/atomic-ref.h>
+#include <linux/xattr.h>
 
 #define CREATE_TRACE_POINTS
 #include <trace/events/io_uring.h>
@@ -726,6 +727,13 @@  struct io_async_rw {
 	struct wait_page_queue		wpq;
 };
 
+struct io_xattr {
+	struct file			*file;
+	struct xattr_ctx		ctx;
+	void				*value;
+	struct filename			*filename;
+};
+
 enum {
 	REQ_F_FIXED_FILE_BIT	= IOSQE_FIXED_FILE_BIT,
 	REQ_F_IO_DRAIN_BIT	= IOSQE_IO_DRAIN_BIT,
@@ -866,6 +874,7 @@  struct io_kiocb {
 		struct io_symlink	symlink;
 		struct io_hardlink	hardlink;
 		struct io_getdents	getdents;
+		struct io_xattr		xattr;
 	};
 
 	u8				opcode;
@@ -1118,6 +1127,10 @@  static const struct io_op_def io_op_defs[] = {
 	[IORING_OP_GETDENTS] = {
 		.needs_file		= 1,
 	},
+	[IORING_OP_FSETXATTR] = {
+		.needs_file = 1
+	},
+	[IORING_OP_SETXATTR] = {},
 };
 
 /* requests with any of those set should undergo io_disarm_next() */
@@ -3887,6 +3900,144 @@  static int io_renameat(struct io_kiocb *req, unsigned int issue_flags)
 	return 0;
 }
 
+static int __io_setxattr_prep(struct io_kiocb *req,
+			const struct io_uring_sqe *sqe,
+			struct user_namespace *user_ns)
+{
+	struct io_xattr *ix = &req->xattr;
+	const char __user *name;
+	void *ret;
+
+	if (unlikely(req->ctx->flags & IORING_SETUP_IOPOLL))
+		return -EINVAL;
+	if (unlikely(sqe->ioprio))
+		return -EINVAL;
+	if (unlikely(req->flags & REQ_F_FIXED_FILE))
+		return -EBADF;
+
+	ix->filename = NULL;
+	name = u64_to_user_ptr(READ_ONCE(sqe->addr));
+	ix->ctx.value = u64_to_user_ptr(READ_ONCE(sqe->addr2));
+	ix->ctx.size = READ_ONCE(sqe->len);
+	ix->ctx.flags = READ_ONCE(sqe->xattr_flags);
+
+	ix->ctx.kname = kmalloc(XATTR_NAME_MAX + 1, GFP_KERNEL);
+	if (!ix->ctx.kname)
+		return -ENOMEM;
+	ix->ctx.kname_sz = XATTR_NAME_MAX + 1;
+
+	ret = setxattr_setup(user_ns, name, &ix->ctx);
+	if (IS_ERR(ret)) {
+		kfree(ix->ctx.kname);
+		return PTR_ERR(ret);
+	}
+
+	ix->value = ret;
+	req->flags |= REQ_F_NEED_CLEANUP;
+	return 0;
+}
+
+static int io_setxattr_prep(struct io_kiocb *req,
+			const struct io_uring_sqe *sqe)
+{
+	struct io_xattr *ix = &req->xattr;
+	const char __user *path;
+	int ret;
+
+	ret = __io_setxattr_prep(req, sqe, current_user_ns());
+	if (ret)
+		return ret;
+
+	path = u64_to_user_ptr(READ_ONCE(sqe->addr3));
+
+	ix->filename = getname_flags(path, LOOKUP_FOLLOW, NULL);
+	if (IS_ERR(ix->filename)) {
+		ret = PTR_ERR(ix->filename);
+		ix->filename = NULL;
+	}
+
+	return ret;
+}
+
+static int io_fsetxattr_prep(struct io_kiocb *req,
+			const struct io_uring_sqe *sqe)
+{
+	return __io_setxattr_prep(req, sqe, file_mnt_user_ns(req->file));
+}
+
+static int __io_setxattr(struct io_kiocb *req, unsigned int issue_flags,
+			struct path *path)
+{
+	struct io_xattr *ix = &req->xattr;
+	int ret;
+
+	ret = mnt_want_write(path->mnt);
+	if (!ret) {
+		ret = vfs_setxattr(mnt_user_ns(path->mnt), path->dentry,
+				ix->ctx.kname, ix->value, ix->ctx.size,
+				ix->ctx.flags);
+		mnt_drop_write(path->mnt);
+	}
+
+	return ret;
+}
+
+static int io_fsetxattr(struct io_kiocb *req, unsigned int issue_flags)
+{
+	struct io_xattr *ix = &req->xattr;
+	int ret;
+
+	if (issue_flags & IO_URING_F_NONBLOCK)
+		return -EAGAIN;
+
+	ret = __io_setxattr(req, issue_flags, &req->file->f_path);
+
+	req->flags &= ~REQ_F_NEED_CLEANUP;
+	kfree(ix->ctx.kname);
+
+	if (ix->value)
+		kvfree(ix->value);
+	if (ret < 0)
+		req_set_fail(req);
+
+	io_req_complete(req, ret);
+	return 0;
+}
+
+static int io_setxattr(struct io_kiocb *req, unsigned int issue_flags)
+{
+	struct io_xattr *ix = &req->xattr;
+	unsigned int lookup_flags = LOOKUP_FOLLOW;
+	struct path path;
+	int ret;
+
+	if (issue_flags & IO_URING_F_NONBLOCK)
+		return -EAGAIN;
+
+retry:
+	ret = do_user_path_at_empty(AT_FDCWD, ix->filename, lookup_flags, &path);
+	if (!ret) {
+		ret = __io_setxattr(req, issue_flags, &path);
+		path_put(&path);
+		if (retry_estale(ret, lookup_flags)) {
+			lookup_flags |= LOOKUP_REVAL;
+			goto retry;
+		}
+	}
+	putname(ix->filename);
+
+	req->flags &= ~REQ_F_NEED_CLEANUP;
+	kfree(ix->ctx.kname);
+
+	if (ix->value)
+		kvfree(ix->value);
+	if (ret < 0)
+		req_set_fail(req);
+
+	io_req_complete(req, ret);
+	return 0;
+}
+
 static int io_unlinkat_prep(struct io_kiocb *req,
 			    const struct io_uring_sqe *sqe)
 {
@@ -6623,6 +6774,10 @@  static int io_req_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
 		return io_linkat_prep(req, sqe);
 	case IORING_OP_GETDENTS:
 		return io_getdents_prep(req, sqe);
+	case IORING_OP_FSETXATTR:
+		return io_fsetxattr_prep(req, sqe);
+	case IORING_OP_SETXATTR:
+		return io_setxattr_prep(req, sqe);
 	}
 
 	printk_once(KERN_WARNING "io_uring: unhandled opcode %d\n",
@@ -6764,6 +6919,14 @@  static void io_clean_op(struct io_kiocb *req)
 			putname(req->hardlink.oldpath);
 			putname(req->hardlink.newpath);
 			break;
+		case IORING_OP_SETXATTR:
+			if (req->xattr.filename)
+				putname(req->xattr.filename);
+			fallthrough;
+		case IORING_OP_FSETXATTR:
+			kfree(req->xattr.ctx.kname);
+			kvfree(req->xattr.value);
+			break;
 		}
 	}
 	if ((req->flags & REQ_F_POLLED) && req->apoll) {
@@ -6909,6 +7072,12 @@  static int io_issue_sqe(struct io_kiocb *req, unsigned int issue_flags)
 	case IORING_OP_GETDENTS:
 		ret = io_getdents(req, issue_flags);
 		break;
+	case IORING_OP_FSETXATTR:
+		ret = io_fsetxattr(req, issue_flags);
+		break;
+	case IORING_OP_SETXATTR:
+		ret = io_setxattr(req, issue_flags);
+		break;
 	default:
 		ret = -EINVAL;
 		break;
@@ -11277,6 +11446,7 @@  static int __init io_uring_init(void)
 	BUILD_BUG_SQE_ELEM(42, __u16,  personality);
 	BUILD_BUG_SQE_ELEM(44, __s32,  splice_fd_in);
 	BUILD_BUG_SQE_ELEM(44, __u32,  file_index);
+	BUILD_BUG_SQE_ELEM(48, __u64,  addr3);
 
 	BUILD_BUG_ON(sizeof(struct io_uring_files_update) !=
 		     sizeof(struct io_uring_rsrc_update));
diff --git a/include/uapi/linux/io_uring.h b/include/uapi/linux/io_uring.h
index 57dc88db5793..c62a8bec8cd4 100644
--- a/include/uapi/linux/io_uring.h
+++ b/include/uapi/linux/io_uring.h
@@ -45,6 +45,7 @@  struct io_uring_sqe {
 		__u32		rename_flags;
 		__u32		unlink_flags;
 		__u32		hardlink_flags;
+		__u32		xattr_flags;
 	};
 	__u64	user_data;	/* data to be passed back at completion time */
 	/* pack this to avoid bogus arm OABI complaints */
@@ -60,7 +61,8 @@  struct io_uring_sqe {
 		__s32	splice_fd_in;
 		__u32	file_index;
 	};
-	__u64	__pad2[2];
+	__u64	addr3;
+	__u64	__pad2[1];
 };
 
 enum {
@@ -144,6 +146,8 @@  enum {
 	IORING_OP_SYMLINKAT,
 	IORING_OP_LINKAT,
 	IORING_OP_GETDENTS,
+	IORING_OP_FSETXATTR,
+	IORING_OP_SETXATTR,
 
 	/* this goes last, obviously */
 	IORING_OP_LAST,