diff mbox series

[v2] btrfs: add io_uring interface for encoded writes

Message ID 20250110172427.1834686-1-maharmstone@fb.com (mailing list archive)
State New
Headers show
Series [v2] btrfs: add io_uring interface for encoded writes | expand

Commit Message

Mark Harmstone Jan. 10, 2025, 5:23 p.m. UTC
Add an io_uring interface for encoded writes, with the same parameters
as the BTRFS_IOC_ENCODED_WRITE ioctl.

As with the encoded reads code, there's a test program for this at
https://github.com/maharmstone/io_uring-encoded, and I'll get this
worked into an fstest.

How io_uring works is that it initially calls btrfs_uring_cmd with the
IO_URING_F_NONBLOCK flag set, and if we return -EAGAIN it tries again in
a kthread with the flag cleared.

Ideally we'd honour this and call try_lock etc., but there's still a lot
of work to be done to create non-blocking versions of all the functions
in our write path. Instead, just validate the input in
btrfs_uring_encoded_write() on the first pass and return -EAGAIN, with a
view to properly optimizing the happy path later on.

Signed-off-by: Mark Harmstone <maharmstone@fb.com>
---
Changelog:
* Version 2: switched to using io_uring_cmd_get_async_data, so that we
only copy from userspace once

 fs/btrfs/ioctl.c | 128 +++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 128 insertions(+)

Comments

David Sterba Jan. 13, 2025, 8:09 p.m. UTC | #1
On Fri, Jan 10, 2025 at 05:23:52PM +0000, Mark Harmstone wrote:
> Add an io_uring interface for encoded writes, with the same parameters
> as the BTRFS_IOC_ENCODED_WRITE ioctl.
> 
> As with the encoded reads code, there's a test program for this at
> https://github.com/maharmstone/io_uring-encoded, and I'll get this
> worked into an fstest.
> 
> How io_uring works is that it initially calls btrfs_uring_cmd with the
> IO_URING_F_NONBLOCK flag set, and if we return -EAGAIN it tries again in
> a kthread with the flag cleared.
> 
> Ideally we'd honour this and call try_lock etc., but there's still a lot
> of work to be done to create non-blocking versions of all the functions
> in our write path. Instead, just validate the input in
> btrfs_uring_encoded_write() on the first pass and return -EAGAIN, with a
> view to properly optimizing the happy path later on.
> 
> Signed-off-by: Mark Harmstone <maharmstone@fb.com>
> ---
> Changelog:
> * Version 2: switched to using io_uring_cmd_get_async_data, so that we
> only copy from userspace once

I've added this to for-next now, it hasn't been in linux-next so far but
we have at least one week before the merge window. The io_uring+read is
about to be released and there's no reason to delay the write for one
whole dev cycle. Any fixups please send as separate patches. Thanks.
diff mbox series

Patch

diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
index 72c1d44c5c1d..5a0e9199851b 100644
--- a/fs/btrfs/ioctl.c
+++ b/fs/btrfs/ioctl.c
@@ -4941,6 +4941,128 @@  static int btrfs_uring_encoded_read(struct io_uring_cmd *cmd, unsigned int issue
 	return ret;
 }
 
+static int btrfs_uring_encoded_write(struct io_uring_cmd *cmd, unsigned int issue_flags)
+{
+	loff_t pos;
+	struct kiocb kiocb;
+	struct file *file;
+	ssize_t ret;
+	void __user *sqe_addr;
+	struct btrfs_uring_encoded_data *data = io_uring_cmd_get_async_data(cmd)->op_data;
+
+	if (!capable(CAP_SYS_ADMIN)) {
+		ret = -EPERM;
+		goto out_acct;
+	}
+
+	file = cmd->file;
+	sqe_addr = u64_to_user_ptr(READ_ONCE(cmd->sqe->addr));
+
+	if (!(file->f_mode & FMODE_WRITE)) {
+		ret = -EBADF;
+		goto out_acct;
+	}
+
+	if (!data) {
+		data = kzalloc(sizeof(*data), GFP_NOFS);
+		if (!data) {
+			ret = -ENOMEM;
+			goto out_acct;
+		}
+
+		io_uring_cmd_get_async_data(cmd)->op_data = data;
+
+		if (issue_flags & IO_URING_F_COMPAT) {
+#if defined(CONFIG_64BIT) && defined(CONFIG_COMPAT)
+			struct btrfs_ioctl_encoded_io_args_32 args32;
+
+			if (copy_from_user(&args32, sqe_addr, sizeof(args32))) {
+				ret = -EFAULT;
+				goto out_acct;
+			}
+			data->args.iov = compat_ptr(args32.iov);
+			data->args.iovcnt = args32.iovcnt;
+			data->args.offset = args32.offset;
+			data->args.flags = args32.flags;
+			data->args.len = args32.len;
+			data->args.unencoded_len = args32.unencoded_len;
+			data->args.unencoded_offset = args32.unencoded_offset;
+			data->args.compression = args32.compression;
+			data->args.encryption = args32.encryption;
+			memcpy(data->args.reserved, args32.reserved,
+			       sizeof(data->args.reserved));
+#else
+			ret = -ENOTTY;
+			goto out_acct;
+#endif
+		} else {
+			if (copy_from_user(&data->args, sqe_addr, sizeof(data->args))) {
+				ret = -EFAULT;
+				goto out_acct;
+			}
+		}
+
+		ret = -EINVAL;
+		if (data->args.flags != 0)
+			goto out_acct;
+		if (memchr_inv(data->args.reserved, 0, sizeof(data->args.reserved)))
+			goto out_acct;
+		if (data->args.compression == BTRFS_ENCODED_IO_COMPRESSION_NONE &&
+		    data->args.encryption == BTRFS_ENCODED_IO_ENCRYPTION_NONE)
+			goto out_acct;
+		if (data->args.compression >= BTRFS_ENCODED_IO_COMPRESSION_TYPES ||
+		    data->args.encryption >= BTRFS_ENCODED_IO_ENCRYPTION_TYPES)
+			goto out_acct;
+		if (data->args.unencoded_offset > data->args.unencoded_len)
+			goto out_acct;
+		if (data->args.len > data->args.unencoded_len - data->args.unencoded_offset)
+			goto out_acct;
+
+		data->iov = data->iovstack;
+		ret = import_iovec(ITER_SOURCE, data->args.iov, data->args.iovcnt,
+				   ARRAY_SIZE(data->iovstack), &data->iov,
+				   &data->iter);
+		if (ret < 0)
+			goto out_acct;
+
+		if (iov_iter_count(&data->iter) == 0) {
+			ret = 0;
+			goto out_iov;
+		}
+	}
+
+	if (issue_flags & IO_URING_F_NONBLOCK) {
+		ret = -EAGAIN;
+		goto out_acct;
+	}
+
+	pos = data->args.offset;
+	ret = rw_verify_area(WRITE, file, &pos, data->args.len);
+	if (ret < 0)
+		goto out_iov;
+
+	init_sync_kiocb(&kiocb, file);
+	ret = kiocb_set_rw_flags(&kiocb, 0, WRITE);
+	if (ret)
+		goto out_iov;
+	kiocb.ki_pos = pos;
+
+	file_start_write(file);
+
+	ret = btrfs_do_write_iter(&kiocb, &data->iter, &data->args);
+	if (ret > 0)
+		fsnotify_modify(file);
+
+	file_end_write(file);
+out_iov:
+	kfree(data->iov);
+out_acct:
+	if (ret > 0)
+		add_wchar(current, ret);
+	inc_syscw(current);
+	return ret;
+}
+
 int btrfs_uring_cmd(struct io_uring_cmd *cmd, unsigned int issue_flags)
 {
 	switch (cmd->cmd_op) {
@@ -4949,6 +5071,12 @@  int btrfs_uring_cmd(struct io_uring_cmd *cmd, unsigned int issue_flags)
 	case BTRFS_IOC_ENCODED_READ_32:
 #endif
 		return btrfs_uring_encoded_read(cmd, issue_flags);
+
+	case BTRFS_IOC_ENCODED_WRITE:
+#if defined(CONFIG_64BIT) && defined(CONFIG_COMPAT)
+	case BTRFS_IOC_ENCODED_WRITE_32:
+#endif
+		return btrfs_uring_encoded_write(cmd, issue_flags);
 	}
 
 	return -EINVAL;