diff mbox series

[V2,3/3] selftests: ublk: add ublk zero copy test

Message ID 20250226155841.2489284-4-ming.lei@redhat.com (mailing list archive)
State New
Headers show
Series selftests: add ublk selftests | expand

Commit Message

Ming Lei Feb. 26, 2025, 3:58 p.m. UTC
Add selftests for covering ublk zero copy feature.

Signed-off-by: Ming Lei <ming.lei@redhat.com>
---
 tools/testing/selftests/ublk/Makefile        |   2 +
 tools/testing/selftests/ublk/kublk.c         | 196 ++++++++++++++++---
 tools/testing/selftests/ublk/test_common.sh  |   8 +
 tools/testing/selftests/ublk/test_loop_03.sh |  32 +++
 tools/testing/selftests/ublk/test_loop_04.sh |  21 ++
 5 files changed, 237 insertions(+), 22 deletions(-)
 create mode 100755 tools/testing/selftests/ublk/test_loop_03.sh
 create mode 100755 tools/testing/selftests/ublk/test_loop_04.sh

Comments

Keith Busch Feb. 26, 2025, 5:41 p.m. UTC | #1
On Wed, Feb 26, 2025 at 11:58:38PM +0800, Ming Lei wrote:
> +	struct io_uring_sqe *reg;
> +	struct io_uring_sqe *rw;
> +	struct io_uring_sqe *ureg;
> +
> +	if (!zc) {
> +		rw = ublk_queue_alloc_sqe(q);
> +		if (!rw)
> +			return -ENOMEM;
> +
> +		io_uring_prep_rw(op, rw, 1 /*fds[1]*/,
> +				(void *)iod->addr,
> +				iod->nr_sectors << 9,
> +				iod->start_sector << 9);
> +		io_uring_sqe_set_flags(rw, IOSQE_FIXED_FILE);
> +		q->io_inflight++;
> +		/* bit63 marks us as tgt io */
> +		rw->user_data = build_user_data(tag, op, UBLK_IO_TGT_NORMAL, 1);
> +		return 0;
> +	}
> +
> +	ublk_queue_alloc_sqe3(q, &reg, &rw, &ureg);
> +
> +	io_uring_prep_buf_register(reg, 0, tag, q->q_id, tag);
> +	reg->user_data = build_user_data(tag, 0xfe, 1, 1);
> +	reg->flags |= IOSQE_CQE_SKIP_SUCCESS;
> +	reg->flags |= IOSQE_IO_LINK;
> +
> +	io_uring_prep_rw(op, rw, 1 /*fds[1]*/, 0,
> +		iod->nr_sectors << 9,
> +		iod->start_sector << 9);
> +	rw->buf_index = tag;
> +	rw->flags |= IOSQE_FIXED_FILE;
> +	rw->flags |= IOSQE_IO_LINK;
> +	rw->user_data = build_user_data(tag, op, UBLK_IO_TGT_ZC_OP, 1);
> +	q->io_inflight++;
> +
> +	io_uring_prep_buf_unregister(ureg, 0, tag, q->q_id, tag);
> +	ureg->user_data = build_user_data(tag, 0xff, UBLK_IO_TGT_ZC_BUF, 1);

You don't have anything handling the unregister command's completion so
I think you want the IOSQE_CQE_SKIP_SUCCESS flag on it otherwise you get
an unexpected CQE for it.
Ming Lei Feb. 27, 2025, 12:25 a.m. UTC | #2
On Wed, Feb 26, 2025 at 10:41:43AM -0700, Keith Busch wrote:
> On Wed, Feb 26, 2025 at 11:58:38PM +0800, Ming Lei wrote:
> > +	struct io_uring_sqe *reg;
> > +	struct io_uring_sqe *rw;
> > +	struct io_uring_sqe *ureg;
> > +
> > +	if (!zc) {
> > +		rw = ublk_queue_alloc_sqe(q);
> > +		if (!rw)
> > +			return -ENOMEM;
> > +
> > +		io_uring_prep_rw(op, rw, 1 /*fds[1]*/,
> > +				(void *)iod->addr,
> > +				iod->nr_sectors << 9,
> > +				iod->start_sector << 9);
> > +		io_uring_sqe_set_flags(rw, IOSQE_FIXED_FILE);
> > +		q->io_inflight++;
> > +		/* bit63 marks us as tgt io */
> > +		rw->user_data = build_user_data(tag, op, UBLK_IO_TGT_NORMAL, 1);
> > +		return 0;
> > +	}
> > +
> > +	ublk_queue_alloc_sqe3(q, &reg, &rw, &ureg);
> > +
> > +	io_uring_prep_buf_register(reg, 0, tag, q->q_id, tag);
> > +	reg->user_data = build_user_data(tag, 0xfe, 1, 1);
> > +	reg->flags |= IOSQE_CQE_SKIP_SUCCESS;
> > +	reg->flags |= IOSQE_IO_LINK;
> > +
> > +	io_uring_prep_rw(op, rw, 1 /*fds[1]*/, 0,
> > +		iod->nr_sectors << 9,
> > +		iod->start_sector << 9);
> > +	rw->buf_index = tag;
> > +	rw->flags |= IOSQE_FIXED_FILE;
> > +	rw->flags |= IOSQE_IO_LINK;
> > +	rw->user_data = build_user_data(tag, op, UBLK_IO_TGT_ZC_OP, 1);
> > +	q->io_inflight++;
> > +
> > +	io_uring_prep_buf_unregister(ureg, 0, tag, q->q_id, tag);
> > +	ureg->user_data = build_user_data(tag, 0xff, UBLK_IO_TGT_ZC_BUF, 1);
> 
> You don't have anything handling the unregister command's completion so
> I think you want the IOSQE_CQE_SKIP_SUCCESS flag on it otherwise you get
> an unexpected CQE for it.

Please see ublk_loop_io_done() which does handle unregister command by
the flag of UBLK_IO_TGT_ZC_BUF. And it is reasonable to complete the
io command after the whole link is done.

BTW, with this way, one ublk bug is actually triggered, and Caleb's patch
fixes it.


Thanks,
Ming
diff mbox series

Patch

diff --git a/tools/testing/selftests/ublk/Makefile b/tools/testing/selftests/ublk/Makefile
index 270d2bbe232b..c235f4ec8b15 100644
--- a/tools/testing/selftests/ublk/Makefile
+++ b/tools/testing/selftests/ublk/Makefile
@@ -6,6 +6,8 @@  LDLIBS += -lpthread -lm -luring
 TEST_PROGS := test_null_01.sh
 TEST_PROGS += test_loop_01.sh
 TEST_PROGS += test_loop_02.sh
+TEST_PROGS += test_loop_03.sh
+TEST_PROGS += test_loop_04.sh
 
 # Order correspond to 'make run_tests' order
 TEST_GEN_PROGS_EXTENDED = kublk
diff --git a/tools/testing/selftests/ublk/kublk.c b/tools/testing/selftests/ublk/kublk.c
index b876752f0e11..528835ec610c 100644
--- a/tools/testing/selftests/ublk/kublk.c
+++ b/tools/testing/selftests/ublk/kublk.c
@@ -42,6 +42,10 @@ 
 #define UBLK_MAX_QUEUES                 4
 #define UBLK_QUEUE_DEPTH                128
 
+#define UBLK_IO_TGT_NORMAL       0
+#define UBLK_IO_TGT_ZC_BUF       1
+#define UBLK_IO_TGT_ZC_OP        2
+
 #define UBLK_DBG_DEV            (1U << 0)
 #define UBLK_DBG_QUEUE          (1U << 1)
 #define UBLK_DBG_IO_CMD         (1U << 2)
@@ -124,6 +128,7 @@  struct ublk_queue {
 #define UBLKSRV_QUEUE_STOPPING	(1U << 0)
 #define UBLKSRV_QUEUE_IDLE	(1U << 1)
 #define UBLKSRV_NO_BUF		(1U << 2)
+#define UBLKSRV_ZC		(1U << 3)
 	unsigned state;
 	pid_t tid;
 	pthread_t thread;
@@ -180,6 +185,11 @@  static inline unsigned int user_data_to_op(__u64 user_data)
 	return (user_data >> 16) & 0xff;
 }
 
+static inline unsigned int user_data_to_tgt_data(__u64 user_data)
+{
+	return (user_data >> 24) & 0xffff;
+}
+
 static void ublk_err(const char *fmt, ...)
 {
 	va_list ap;
@@ -221,6 +231,16 @@  static inline void *ublk_get_sqe_cmd(const struct io_uring_sqe *sqe)
 	return (void *)&sqe->cmd;
 }
 
+static inline void ublk_set_io_res(struct ublk_queue *q, int tag, int res)
+{
+	q->ios[tag].result = res;
+}
+
+static inline int ublk_get_io_res(const struct ublk_queue *q, unsigned tag)
+{
+	return q->ios[tag].result;
+}
+
 static inline void ublk_mark_io_done(struct ublk_io *io, int res)
 {
 	io->flags |= (UBLKSRV_NEED_COMMIT_RQ_COMP | UBLKSRV_IO_FREE);
@@ -497,11 +517,18 @@  static int ublk_queue_cmd_buf_sz(struct ublk_queue *q)
 	return __ublk_queue_cmd_buf_sz(q->q_depth);
 }
 
+static int ublk_queue_use_zc(const struct ublk_queue *q)
+{
+	return q->state & UBLKSRV_ZC;
+}
+
 static void ublk_queue_deinit(struct ublk_queue *q)
 {
 	int i;
 	int nr_ios = q->q_depth;
 
+	io_uring_unregister_buffers(&q->ring);
+
 	io_uring_unregister_ring_fd(&q->ring);
 
 	if (q->ring.ring_fd > 0) {
@@ -532,6 +559,11 @@  static int ublk_queue_init(struct ublk_queue *q)
 	q->cmd_inflight = 0;
 	q->tid = gettid();
 
+	if (dev->dev_info.flags & UBLK_F_SUPPORT_ZERO_COPY) {
+		q->state |= UBLKSRV_NO_BUF;
+		q->state |= UBLKSRV_ZC;
+	}
+
 	cmd_buf_size = ublk_queue_cmd_buf_sz(q);
 	off = UBLKSRV_CMD_BUF_OFFSET + q->q_id * ublk_queue_max_cmd_buf_sz();
 	q->io_cmd_buf = (char *)mmap(0, cmd_buf_size, PROT_READ,
@@ -566,6 +598,15 @@  static int ublk_queue_init(struct ublk_queue *q)
 		goto fail;
 	}
 
+	if (dev->dev_info.flags & UBLK_F_SUPPORT_ZERO_COPY) {
+		ret = io_uring_register_buffers_sparse(&q->ring, q->q_depth);
+		if (ret) {
+			ublk_err("ublk dev %d queue %d register spare buffers failed %d",
+					dev->dev_info.dev_id, q->q_id, ret);
+			goto fail;
+		}
+	}
+
 	io_uring_register_ring_fd(&q->ring);
 
 	ret = io_uring_register_files(&q->ring, dev->fds, dev->nr_fds);
@@ -733,9 +774,10 @@  static void ublk_handle_cqe(struct io_uring *r,
 		ublk_err("%s: res %d userdata %llx queue state %x\n", __func__,
 				cqe->res, cqe->user_data, q->state);
 
-	ublk_dbg(UBLK_DBG_IO_CMD, "%s: res %d (qid %d tag %u cmd_op %u target %d) stopping %d\n",
+	ublk_dbg(UBLK_DBG_IO_CMD, "%s: res %d (qid %d tag %u cmd_op %u target %d/%d) stopping %d\n",
 			__func__, cqe->res, q->q_id, tag, cmd_op,
 			is_target_io(cqe->user_data),
+			user_data_to_tgt_data(cqe->user_data),
 			(q->state & UBLKSRV_QUEUE_STOPPING));
 
 	/* Don't retrieve io in case of target io */
@@ -1245,6 +1287,52 @@  static int cmd_dev_help(char *exe)
 
 /****************** part 2: target implementation ********************/
 
+static inline void ublk_queue_alloc_sqe3(struct ublk_queue *q,
+		struct io_uring_sqe **sqe1, struct io_uring_sqe **sqe2,
+		struct io_uring_sqe **sqe3)
+{
+	struct io_uring *r = &q->ring;
+	unsigned left = io_uring_sq_space_left(r);
+
+	if (left < 3)
+		io_uring_submit(r);
+
+	*sqe1 = io_uring_get_sqe(r);
+	*sqe2 = io_uring_get_sqe(r);
+	*sqe3 = io_uring_get_sqe(r);
+}
+
+static inline void io_uring_prep_buf_register(struct io_uring_sqe *sqe,
+		int dev_fd, int tag, int q_id, __u64 index)
+{
+	struct ublksrv_io_cmd *cmd = (struct ublksrv_io_cmd *)sqe->cmd;
+
+	io_uring_prep_read(sqe, dev_fd, 0, 0, 0);
+	sqe->opcode		= IORING_OP_URING_CMD;
+	sqe->flags		|= IOSQE_FIXED_FILE;
+	sqe->cmd_op		= UBLK_U_IO_REGISTER_IO_BUF;
+
+	cmd->tag		= tag;
+	cmd->addr		= index;
+	cmd->q_id		= q_id;
+}
+
+static inline void io_uring_prep_buf_unregister(struct io_uring_sqe *sqe,
+		int dev_fd, int tag, int q_id, __u64 index)
+{
+	struct ublksrv_io_cmd *cmd = (struct ublksrv_io_cmd *)sqe->cmd;
+
+	io_uring_prep_read(sqe, dev_fd, 0, 0, 0);
+	sqe->opcode		= IORING_OP_URING_CMD;
+	sqe->flags		|= IOSQE_FIXED_FILE;
+	sqe->cmd_op		= UBLK_U_IO_UNREGISTER_IO_BUF;
+
+	cmd->tag		= tag;
+	cmd->addr		= index;
+	cmd->q_id		= q_id;
+}
+
+
 static int ublk_null_tgt_init(struct ublk_dev *dev)
 {
 	const struct ublksrv_ctrl_dev_info *info = &dev->dev_info;
@@ -1326,48 +1414,95 @@  static int backing_file_tgt_init(struct ublk_dev *dev)
 	return 0;
 }
 
+static enum io_uring_op ublk_to_uring_op(const struct ublksrv_io_desc *iod, int zc)
+{
+	unsigned ublk_op = ublksrv_get_op(iod);
+
+	if (ublk_op == UBLK_IO_OP_READ)
+		return zc ? IORING_OP_READ_FIXED: IORING_OP_READ;
+	else if (ublk_op == UBLK_IO_OP_WRITE)
+		return zc ? IORING_OP_WRITE_FIXED: IORING_OP_WRITE;
+	else
+		assert(0);
+}
+
+static int loop_queue_tgt_rw_io(struct ublk_queue *q, const struct ublksrv_io_desc *iod, int tag)
+{
+	int zc = ublk_queue_use_zc(q);
+	enum io_uring_op op = ublk_to_uring_op(iod, zc);
+	struct io_uring_sqe *reg;
+	struct io_uring_sqe *rw;
+	struct io_uring_sqe *ureg;
+
+	if (!zc) {
+		rw = ublk_queue_alloc_sqe(q);
+		if (!rw)
+			return -ENOMEM;
+
+		io_uring_prep_rw(op, rw, 1 /*fds[1]*/,
+				(void *)iod->addr,
+				iod->nr_sectors << 9,
+				iod->start_sector << 9);
+		io_uring_sqe_set_flags(rw, IOSQE_FIXED_FILE);
+		q->io_inflight++;
+		/* bit63 marks us as tgt io */
+		rw->user_data = build_user_data(tag, op, UBLK_IO_TGT_NORMAL, 1);
+		return 0;
+	}
+
+	ublk_queue_alloc_sqe3(q, &reg, &rw, &ureg);
+
+	io_uring_prep_buf_register(reg, 0, tag, q->q_id, tag);
+	reg->user_data = build_user_data(tag, 0xfe, 1, 1);
+	reg->flags |= IOSQE_CQE_SKIP_SUCCESS;
+	reg->flags |= IOSQE_IO_LINK;
+
+	io_uring_prep_rw(op, rw, 1 /*fds[1]*/, 0,
+		iod->nr_sectors << 9,
+		iod->start_sector << 9);
+	rw->buf_index = tag;
+	rw->flags |= IOSQE_FIXED_FILE;
+	rw->flags |= IOSQE_IO_LINK;
+	rw->user_data = build_user_data(tag, op, UBLK_IO_TGT_ZC_OP, 1);
+	q->io_inflight++;
+
+	io_uring_prep_buf_unregister(ureg, 0, tag, q->q_id, tag);
+	ureg->user_data = build_user_data(tag, 0xff, UBLK_IO_TGT_ZC_BUF, 1);
+	q->io_inflight++;
+
+	return 0;
+}
+
 static int loop_queue_tgt_io(struct ublk_queue *q, int tag)
 {
 	const struct ublksrv_io_desc *iod = ublk_get_iod(q, tag);
-	struct io_uring_sqe *sqe = ublk_queue_alloc_sqe(q);
 	unsigned ublk_op = ublksrv_get_op(iod);
-
-	if (!sqe)
-		return -ENOMEM;
+	struct io_uring_sqe *sqe;
 
 	switch (ublk_op) {
 	case UBLK_IO_OP_FLUSH:
+		sqe = ublk_queue_alloc_sqe(q);
+		if (!sqe)
+			return -ENOMEM;
 		io_uring_prep_sync_file_range(sqe, 1 /*fds[1]*/,
 				iod->nr_sectors << 9,
 				iod->start_sector << 9,
 				IORING_FSYNC_DATASYNC);
 		io_uring_sqe_set_flags(sqe, IOSQE_FIXED_FILE);
+		q->io_inflight++;
+		sqe->user_data = build_user_data(tag, ublk_op, UBLK_IO_TGT_NORMAL, 1);
 		break;
 	case UBLK_IO_OP_WRITE_ZEROES:
 	case UBLK_IO_OP_DISCARD:
 		return -ENOTSUP;
 	case UBLK_IO_OP_READ:
-		io_uring_prep_read(sqe, 1 /*fds[1]*/,
-				(void *)iod->addr,
-				iod->nr_sectors << 9,
-				iod->start_sector << 9);
-		io_uring_sqe_set_flags(sqe, IOSQE_FIXED_FILE);
-		break;
 	case UBLK_IO_OP_WRITE:
-		io_uring_prep_write(sqe, 1 /*fds[1]*/,
-				(void *)iod->addr,
-				iod->nr_sectors << 9,
-				iod->start_sector << 9);
-		io_uring_sqe_set_flags(sqe, IOSQE_FIXED_FILE);
+		loop_queue_tgt_rw_io(q, iod, tag);
 		break;
 	default:
 		return -EINVAL;
 	}
 
-	q->io_inflight++;
-	/* bit63 marks us as tgt io */
-	sqe->user_data = build_user_data(tag, ublk_op, 0, 1);
-
 	ublk_dbg(UBLK_DBG_IO, "%s: tag %d ublk io %x %llx %u\n", __func__, tag,
 			iod->op_flags, iod->start_sector, iod->nr_sectors << 9);
 	return 1;
@@ -1387,9 +1522,22 @@  static void ublk_loop_io_done(struct ublk_queue *q, int tag,
 		const struct io_uring_cqe *cqe)
 {
 	int cqe_tag = user_data_to_tag(cqe->user_data);
+	unsigned tgt_data = user_data_to_tgt_data(cqe->user_data);
+	int res = cqe->res;
+
+	if (res < 0 || tgt_data == UBLK_IO_TGT_NORMAL)
+		goto complete;
 
+	if (tgt_data == UBLK_IO_TGT_ZC_OP) {
+		ublk_set_io_res(q, tag, cqe->res);
+		goto exit;
+	}
+	assert(tgt_data == UBLK_IO_TGT_ZC_BUF);
+	res = ublk_get_io_res(q, tag);
+complete:
 	assert(tag == cqe_tag);
-	ublk_complete_io(q, tag, cqe->res);
+	ublk_complete_io(q, tag, res);
+exit:
 	q->io_inflight--;
 }
 
@@ -1460,6 +1608,7 @@  int main(int argc, char *argv[])
 		{ "depth",		1,	NULL, 'd' },
 		{ "debug_mask",		1,	NULL,  0  },
 		{ "quiet",		0,	NULL,  0  },
+		{ "zero_copy",          1,      NULL, 'z' },
 		{ 0, 0, 0, 0 }
 	};
 	int option_idx, opt;
@@ -1476,7 +1625,7 @@  int main(int argc, char *argv[])
 		return ret;
 
 	optind = 2;
-	while ((opt = getopt_long(argc, argv, "t:n:d:q:a",
+	while ((opt = getopt_long(argc, argv, "t:n:d:q:a:z",
 				  longopts, &option_idx)) != -1) {
 		switch (opt) {
 		case 'a':
@@ -1495,6 +1644,9 @@  int main(int argc, char *argv[])
 		case 'd':
 			ctx.queue_depth = strtol(optarg, NULL, 10);
 			break;
+		case 'z':
+			ctx.flags |= UBLK_F_SUPPORT_ZERO_COPY | UBLK_F_USER_COPY;
+			break;
 		case 0:
 			if (!strcmp(longopts[option_idx].name, "debug_mask"))
 				ublk_dbg_mask = strtol(optarg, NULL, 16);
diff --git a/tools/testing/selftests/ublk/test_common.sh b/tools/testing/selftests/ublk/test_common.sh
index 2b894c7a8e2e..711538a2cbf5 100755
--- a/tools/testing/selftests/ublk/test_common.sh
+++ b/tools/testing/selftests/ublk/test_common.sh
@@ -101,4 +101,12 @@  _add_ublk_dev() {
 	echo ${dev_id}
 }
 
+_have_feature()
+{
+	if  $UBLK_PROG "features" | grep $1 > /dev/null 2>&1; then
+		return 0
+	fi
+	return 1
+}
+
 export UBLK_PROG=$(pwd)/kublk
diff --git a/tools/testing/selftests/ublk/test_loop_03.sh b/tools/testing/selftests/ublk/test_loop_03.sh
new file mode 100755
index 000000000000..3aa98fe5137b
--- /dev/null
+++ b/tools/testing/selftests/ublk/test_loop_03.sh
@@ -0,0 +1,32 @@ 
+#!/bin/bash
+
+. test_common.sh
+
+TID="loop_03"
+ERR_CODE=0
+
+_have_feature "ZERO_COPY" || exit 4
+
+_prep_test "loop" "write and verify over zero copy"
+
+backfile_0=`_create_backfile 256M`
+
+dev_id=`_add_ublk_dev -t loop $backfile_0 -z`
+
+# run fio over the ublk disk
+fio --name=write_and_verify \
+    --filename=/dev/ublkb${dev_id} \
+    --ioengine=libaio --iodepth=64 \
+    --rw=write \
+    --size=256M \
+    --direct=1 \
+    --verify=crc32c \
+    --do_verify=1 \
+    --bs=4k > /dev/null 2>&1
+ERR_CODE=$?
+
+_cleanup_test ${dev_id} "loop"
+
+_remove_backfile $backfile_0
+
+_show_result $TID $ERR_CODE
diff --git a/tools/testing/selftests/ublk/test_loop_04.sh b/tools/testing/selftests/ublk/test_loop_04.sh
new file mode 100755
index 000000000000..86c0c86c93a2
--- /dev/null
+++ b/tools/testing/selftests/ublk/test_loop_04.sh
@@ -0,0 +1,21 @@ 
+#!/bin/bash
+
+. test_common.sh
+
+TID="loop_04"
+ERR_CODE=0
+
+_prep_test "loop" "mkfs & mount & umount with zero copy"
+
+backfile_0=`_create_backfile 256M`
+
+dev_id=`_add_ublk_dev -t loop -z $backfile_0`
+
+_mkfs_mount_test /dev/ublkb${dev_id}
+ERR_CODE=$?
+
+_cleanup_test ${dev_id} "loop"
+
+_remove_backfile $backfile_0
+
+_show_result $TID $ERR_CODE