Message ID | 20230220034649.1522978-2-ming.lei@redhat.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | blktests: add mini ublk source and blktests/033 | expand |
On 2/19/2023 7:46 PM, Ming Lei wrote: > Prepare for adding ublk related test: > > 1) ublk delete is sync removal, this way is convenient to > blkg/queue/disk instance leak issue > > 2) mini ublk has two builtin target(null, loop), and loop IO is > handled by io_uring, so we can use ublk to cover part of io_uring > workloads > > 3) not like loop/nbd, ublk won't pre-allocate/add disk, and always > add/delete disk dynamically, this way may cover disk plug & unplug > tests > > 4) ublk specific test given people starts to use it, so better to > let blktest cover ublk related tests > > Add mini ublk source for test purpose only, which is easy to use: > > ./miniublk add -t {null|loop} [-q nr_queues] [-d depth] [-n dev_id] > default: nr_queues=2(max 4), depth=128(max 128), dev_id=-1(auto allocation) > -t loop -f backing_file > -t null > ./miniublk del [-n dev_id] -a > -a delete all devices, -n delete specified device > ./miniublk list [-n dev_id] -a > -a list all devices, -n list specified device, default -a > > miniublk depends on liburing 2.2, adds HAVE_LIBURING for checking if > liburing 2.2 exists; also add HAVE_UBLK_HEADER for checking ublk kernel > UAPI header exists. If either of two dependencies can't be met, simply > ignore miniublk target. > > Also v6.0 is the 1st linux kernel release with ublk. > > Signed-off-by: Ming Lei <ming.lei@redhat.com> > --- > src/.gitignore | 1 + > src/Makefile | 18 + > src/miniublk.c | 1376 ++++++++++++++++++++++++++++++++++++++++++++++++ > 3 files changed, 1395 insertions(+) > create mode 100644 src/miniublk.c > > diff --git a/src/.gitignore b/src/.gitignore > index 355bed3..df7aff5 100644 > --- a/src/.gitignore > +++ b/src/.gitignore > @@ -8,3 +8,4 @@ > /sg/dxfer-from-dev > /sg/syzkaller1 > /zbdioctl > +/miniublk > diff --git a/src/Makefile b/src/Makefile > index 3b587f6..81c6541 100644 > --- a/src/Makefile > +++ b/src/Makefile > @@ -2,6 +2,10 @@ HAVE_C_HEADER = $(shell if echo "\#include <$(1)>" | \ > $(CC) -E - > /dev/null 2>&1; then echo "$(2)"; \ > else echo "$(3)"; fi) > > +HAVE_C_MACRO = $(shell if echo "#include <$(1)>" | \ > + $(CC) -E - 2>&1 /dev/null | grep $(2) > /dev/null 2>&1; \ > + then echo 1;else echo 0; fi) > + > C_TARGETS := \ > loblksize \ > loop_change_fd \ > @@ -13,16 +17,27 @@ C_TARGETS := \ > sg/syzkaller1 \ > zbdioctl > > +C_MINIUBLK := miniublk > + > +HAVE_LIBURING := $(call HAVE_C_MACRO,liburing.h,IORING_OP_URING_CMD) > +HAVE_UBLK_HEADER := $(call HAVE_C_HEADER,linux/ublk_cmd.h,1) > + > CXX_TARGETS := \ > discontiguous-io > > +ifeq ($(HAVE_LIBURING)$(HAVE_UBLK_HEADER), 11) > +TARGETS := $(C_TARGETS) $(CXX_TARGETS) $(C_MINIUBLK) > +else > +$(info Skip $(C_MINIUBLK) build due to missing kernel header(v6.0+) or liburing(2.2+)) > TARGETS := $(C_TARGETS) $(CXX_TARGETS) > +endif > > CONFIG_DEFS := $(call HAVE_C_HEADER,linux/blkzoned.h,-DHAVE_LINUX_BLKZONED_H) > > override CFLAGS := -O2 -Wall -Wshadow $(CFLAGS) $(CONFIG_DEFS) > override CXXFLAGS := -O2 -std=c++11 -Wall -Wextra -Wshadow -Wno-sign-compare \ > -Werror $(CXXFLAGS) $(CONFIG_DEFS) > +MINIUBLK_FLAGS := -D_GNU_SOURCE -lpthread -luring > > all: $(TARGETS) > > @@ -39,4 +54,7 @@ $(C_TARGETS): %: %.c > $(CXX_TARGETS): %: %.cpp > $(CXX) $(CPPFLAGS) $(CXXFLAGS) -o $@ $^ > > +$(C_MINIUBLK): %: miniublk.c > + $(CC) $(CFLAGS) $(MINIUBLK_FLAGS) -o $@ miniublk.c > + > .PHONY: all clean install > diff --git a/src/miniublk.c b/src/miniublk.c > new file mode 100644 > index 0000000..e84ba41 > --- /dev/null > +++ b/src/miniublk.c > @@ -0,0 +1,1376 @@ > +// SPDX-License-Identifier: GPL-3.0+ > +// Copyright (C) 2023 Ming Lei > + > +/* > + * io_uring based mini ublk implementation with null/loop target, > + * for test purpose only. > + * > + * So please keep it clean & simple & reliable. > + */ > + > +#include <unistd.h> > +#include <stdlib.h> > +#include <assert.h> > +#include <stdio.h> > +#include <stdarg.h> > +#include <string.h> > +#include <pthread.h> > +#include <getopt.h> > +#include <limits.h> > +#include <sys/syscall.h> > +#include <sys/mman.h> > +#include <sys/ioctl.h> > +#include <liburing.h> > +#include <linux/ublk_cmd.h> > + > +#define CTRL_DEV "/dev/ublk-control" > +#define UBLKC_DEV "/dev/ublkc" > +#define UBLK_CTRL_RING_DEPTH 32 > + > +/* queue idle timeout */ > +#define UBLKSRV_IO_IDLE_SECS 20 > + > +#define UBLK_IO_MAX_BYTES 65536 > +#define UBLK_MAX_QUEUES 4 > +#define UBLK_QUEUE_DEPTH 128 > + > +#define UBLK_DBG_DEV (1U << 0) > +#define UBLK_DBG_QUEUE (1U << 1) > +#define UBLK_DBG_IO_CMD (1U << 2) > +#define UBLK_DBG_IO (1U << 3) > +#define UBLK_DBG_CTRL_CMD (1U << 4) > +#define UBLK_LOG (1U << 5) > + > +struct ublk_dev; > +struct ublk_queue; > + > +struct ublk_ctrl_cmd_data { > + unsigned short cmd_op; perhaps use enum type to avoid any type mismatach errors in future.. > +#define CTRL_CMD_HAS_DATA 1 > +#define CTRL_CMD_HAS_BUF 2 > + unsigned short flags; I'd keep falgs u64 or u32 just incase.. > + > + __u64 data[2]; > + __u64 addr; > + __u32 len; > +}; > + > +struct ublk_io { > + char *buf_addr; > + > +#define UBLKSRV_NEED_FETCH_RQ (1UL << 0) > +#define UBLKSRV_NEED_COMMIT_RQ_COMP (1UL << 1) > +#define UBLKSRV_IO_FREE (1UL << 2) > + unsigned int flags; > + > + unsigned int result; > +}; > + > +struct ublk_tgt_ops { > + const char *name; > + int (*init_tgt)(struct ublk_dev *); > + void (*deinit_tgt)(struct ublk_dev *); > + > + int (*queue_io)(struct ublk_queue *, int tag); > + void (*tgt_io_done)(struct ublk_queue *, > + int tag, const struct io_uring_cqe *); > +}; > + > +struct ublk_tgt { > + unsigned long dev_size; > + const struct ublk_tgt_ops *ops; > + int argc; > + char **argv; > + struct ublk_params params; > +}; > + > +struct ublk_queue { > + int q_id; > + int q_depth; is there a reason to not use unsigned int for qid and qd above ? > + unsigned int cmd_inflight; > + unsigned int io_inflight; > + struct ublk_dev *dev; > + const struct ublk_tgt_ops *tgt_ops; > + char *io_cmd_buf; > + struct io_uring ring; > + struct ublk_io ios[UBLK_QUEUE_DEPTH]; > +#define UBLKSRV_QUEUE_STOPPING (1U << 0) > +#define UBLKSRV_QUEUE_IDLE (1U << 1) > + unsigned state; > + int tid; instead of int pid_t ? > + pthread_t thread; > +}; > + > +struct ublk_dev { > + struct ublk_tgt tgt; > + struct ublksrv_ctrl_dev_info dev_info; > + struct ublk_queue q[UBLK_MAX_QUEUES]; > + > + int fds[2]; /* fds[0] points to /dev/ublkcN */ > + int nr_fds; > + int ctrl_fd; > + struct io_uring ring; > +}; > + > +#ifndef offsetof > +#define offsetof(TYPE, MEMBER) ((size_t)&((TYPE *)0)->MEMBER) > +#endif > + > +#ifndef container_of > +#define container_of(ptr, type, member) ({ \ > + unsigned long __mptr = (unsigned long)(ptr); \ > + ((type *)(__mptr - offsetof(type, member))); }) > +#endif > + > +#define round_up(val, rnd) \ > + (((val) + ((rnd) - 1)) & ~((rnd) - 1)) > + > +#define ublk_assert(x) do { \ > + if (!(x)) { \ > + ublk_err("%s %d: assert!\n", __func__, __LINE__); \ > + assert(x); \ > + } \ > +} while (0) > + > +static const struct ublk_tgt_ops *ublk_find_tgt(const char *name); > + > +static unsigned int ublk_dbg_mask = UBLK_LOG; > + > +static inline unsigned ilog2(unsigned x) > +{ > + return sizeof(unsigned) * 8 - 1 - __builtin_clz(x); > +} > + > +static inline int is_target_io(__u64 user_data) > +{ > + return (user_data & (1ULL << 63)) != 0; > +} > + > +static inline __u64 build_user_data(unsigned tag, unsigned op, > + unsigned tgt_data, unsigned is_target_io) > +{ > + assert(!(tag >> 16) && !(op >> 8) && !(tgt_data >> 16)); > + > + return tag | (op << 16) | (tgt_data << 24) | (__u64)is_target_io << 63; > +} > + > +static inline unsigned int user_data_to_tag(__u64 user_data) > +{ > + return user_data & 0xffff; > +} > + > +static inline unsigned int user_data_to_op(__u64 user_data) > +{ > + return (user_data >> 16) & 0xff; > +} > + > +static void ublk_err(const char *fmt, ...) > +{ > + va_list ap; > + > + va_start(ap, fmt); > + vfprintf(stderr, fmt, ap); > +} > + > +static void ublk_log(const char *fmt, ...) > +{ > + if (ublk_dbg_mask & UBLK_LOG) { > + va_list ap; > + > + va_start(ap, fmt); > + vfprintf(stdout, fmt, ap); > + } > +} > + > +static void ublk_dbg(int level, const char *fmt, ...) > +{ > + if (level & ublk_dbg_mask) { > + va_list ap; > + va_start(ap, fmt); > + vfprintf(stdout, fmt, ap); > + } > +} > + > +static inline void *ublk_get_sqe_cmd(const struct io_uring_sqe *sqe) > +{ > + return (void *)&sqe->addr3; > +} > + > +static inline void ublk_mark_io_done(struct ublk_io *io, int res) > +{ > + io->flags |= (UBLKSRV_NEED_COMMIT_RQ_COMP | UBLKSRV_IO_FREE); > + io->result = res; > +} > + > +static inline const struct ublksrv_io_desc *ublk_get_iod( > + const struct ublk_queue *q, int tag) > +{ > + return (struct ublksrv_io_desc *) > + &(q->io_cmd_buf[tag * sizeof(struct ublksrv_io_desc)]); > +} > + > +static inline void ublk_set_sqe_cmd_op(struct io_uring_sqe *sqe, > + __u32 cmd_op) > +{ > + __u32 *addr = (__u32 *)&sqe->off; > + > + addr[0] = cmd_op; > + addr[1] = 0; > +} > + > +static inline int ublk_setup_ring(struct io_uring *r, int depth, > + int cq_depth, unsigned flags) > +{ > + struct io_uring_params p; > + > + memset(&p, 0, sizeof(p)); > + p.flags = flags | IORING_SETUP_CQSIZE; > + p.cq_entries = cq_depth; > + > + return io_uring_queue_init_params(depth, r, &p); > +} > + > +static inline void ublk_ctrl_init_cmd(struct ublk_dev *dev, > + struct io_uring_sqe *sqe, > + struct ublk_ctrl_cmd_data *data) > +{ > + struct ublksrv_ctrl_dev_info *info = &dev->dev_info; > + struct ublksrv_ctrl_cmd *cmd = (struct ublksrv_ctrl_cmd *)ublk_get_sqe_cmd(sqe); > + > + sqe->fd = dev->ctrl_fd; > + sqe->opcode = IORING_OP_URING_CMD; > + sqe->ioprio = 0; > + > + if (data->flags & CTRL_CMD_HAS_BUF) { > + cmd->addr = data->addr; > + cmd->len = data->len; > + } > + > + if (data->flags & CTRL_CMD_HAS_DATA) > + cmd->data[0] = data->data[0]; > + > + cmd->dev_id = info->dev_id; > + cmd->queue_id = -1; > + > + ublk_set_sqe_cmd_op(sqe, data->cmd_op); > + > + io_uring_sqe_set_data(sqe, cmd); > +} > + > +static int __ublk_ctrl_cmd(struct ublk_dev *dev, > + struct ublk_ctrl_cmd_data *data) > +{ > + struct io_uring_sqe *sqe; > + struct io_uring_cqe *cqe; > + int ret = -EINVAL; > + > + sqe = io_uring_get_sqe(&dev->ring); > + if (!sqe) { > + ublk_err("%s: can't get sqe ret %d\n", __func__, ret); > + return ret; > + } > + > + ublk_ctrl_init_cmd(dev, sqe, data); > + > + ret = io_uring_submit(&dev->ring); > + if (ret < 0) { > + ublk_err("uring submit ret %d\n", ret); > + return ret; > + } > + > + ret = io_uring_wait_cqe(&dev->ring, &cqe); > + if (ret < 0) { > + ublk_err("wait cqe: %s\n", strerror(-ret)); > + return ret; > + } > + io_uring_cqe_seen(&dev->ring, cqe); > + > + return cqe->res; > +} > + > +int ublk_ctrl_stop_dev(struct ublk_dev *dev) > +{ > + struct ublk_ctrl_cmd_data data = { > + .cmd_op = UBLK_CMD_STOP_DEV, > + }; > + int ret; > + > + ret = __ublk_ctrl_cmd(dev, &data); > + return ret; you can just return directly from here and remove local var ret.. > +} > + > +int ublk_ctrl_start_dev(struct ublk_dev *dev, > + int daemon_pid) > +{ > + struct ublk_ctrl_cmd_data data = { > + .cmd_op = UBLK_CMD_START_DEV, > + .flags = CTRL_CMD_HAS_DATA, > + }; > + int ret; > + > + dev->dev_info.ublksrv_pid = data.data[0] = daemon_pid; > + > + ret = __ublk_ctrl_cmd(dev, &data); > + > + return ret; same here as previous comment ... > +} > + > +int ublk_ctrl_add_dev(struct ublk_dev *dev) > +{ > + struct ublk_ctrl_cmd_data data = { > + .cmd_op = UBLK_CMD_ADD_DEV, > + .flags = CTRL_CMD_HAS_BUF, > + .addr = (__u64)&dev->dev_info, > + .len = sizeof(struct ublksrv_ctrl_dev_info), > + }; > + > + return __ublk_ctrl_cmd(dev, &data); > +} > + > +int ublk_ctrl_del_dev(struct ublk_dev *dev) > +{ > + struct ublk_ctrl_cmd_data data = { > + .cmd_op = UBLK_CMD_DEL_DEV, > + .flags = 0, > + }; > + > + return __ublk_ctrl_cmd(dev, &data); > +} > + > +int ublk_ctrl_get_info(struct ublk_dev *dev) > +{ > + struct ublk_ctrl_cmd_data data = { > + .cmd_op = UBLK_CMD_GET_DEV_INFO, > + .flags = CTRL_CMD_HAS_BUF, > + .addr = (__u64)&dev->dev_info, > + .len = sizeof(struct ublksrv_ctrl_dev_info), > + }; > + > + return __ublk_ctrl_cmd(dev, &data); > +} > + > +int ublk_ctrl_set_params(struct ublk_dev *dev, > + struct ublk_params *params) > +{ > + struct ublk_ctrl_cmd_data data = { > + .cmd_op = UBLK_CMD_SET_PARAMS, > + .flags = CTRL_CMD_HAS_BUF, > + .addr = (__u64)params, > + .len = sizeof(*params), > + }; > + params->len = sizeof(*params); > + return __ublk_ctrl_cmd(dev, &data); > +} > + > +static int ublk_ctrl_get_params(struct ublk_dev *dev, > + struct ublk_params *params) > +{ > + struct ublk_ctrl_cmd_data data = { > + .cmd_op = UBLK_CMD_GET_PARAMS, > + .flags = CTRL_CMD_HAS_BUF, > + .addr = (__u64)params, > + .len = sizeof(*params), > + }; > + int ret; > + > + params->len = sizeof(*params); > + > + ret = __ublk_ctrl_cmd(dev, &data); > + return ret; same here as previous comment ... > +} > + > +static const char *ublk_dev_state_desc(struct ublk_dev *dev) > +{ > + switch (dev->dev_info.state) { > + case UBLK_S_DEV_DEAD: > + return "DEAD"; > + case UBLK_S_DEV_LIVE: > + return "LIVE"; > + default: > + return "UNKNOWN"; > + }; > +} > + > +static void ublk_ctrl_dump(struct ublk_dev *dev, bool show_queue) > +{ > + struct ublksrv_ctrl_dev_info *info = &dev->dev_info; > + int ret; > + struct ublk_params p; > + > + ret = ublk_ctrl_get_params(dev, &p); > + if (ret < 0) { > + ublk_err("failed to get params %m\n"); > + return; > + } > + > + ublk_log("dev id %d: nr_hw_queues %d queue_depth %d block size %d dev_capacity %lld\n", > + info->dev_id, > + info->nr_hw_queues, info->queue_depth, > + 1 << p.basic.logical_bs_shift, p.basic.dev_sectors); > + ublk_log("\tmax rq size %d daemon pid %d flags 0x%llx state %s\n", > + info->max_io_buf_bytes, > + info->ublksrv_pid, info->flags, > + ublk_dev_state_desc(dev)); > + if (show_queue) { > + int i; > + > + for (i = 0; i < dev->dev_info.nr_hw_queues; i++) > + ublk_log("\tqueue 0 tid: %d\n", dev->q[i].tid); > + } > + fflush(stdout); > +} > + > +static void ublk_ctrl_deinit(struct ublk_dev *dev) > +{ > + close(dev->ctrl_fd); > + free(dev); > +} > + > +static struct ublk_dev *ublk_ctrl_init() perhaps specify void -> ublk_ctrl_init(void) > +{ > + struct ublk_dev *dev = (struct ublk_dev *)calloc(1, sizeof(*dev)); > + struct ublksrv_ctrl_dev_info *info = &dev->dev_info; > + int ret; > + > + dev->ctrl_fd = open(CTRL_DEV, O_RDWR); > + if (dev->ctrl_fd < 0) { > + ublk_err("control dev %s can't be opened: %m\n", CTRL_DEV); perhaps also print the errno that is set by the open system call unless ublk_err is doing that implicitly which I'm not sure. also I didn't understand two format specifiers for above call and only one argument its a bit confusing to read the code... > + exit(dev->ctrl_fd); > + } > + info->max_io_buf_bytes = UBLK_IO_MAX_BYTES; > + > + ret = ublk_setup_ring(&dev->ring, UBLK_CTRL_RING_DEPTH, > + UBLK_CTRL_RING_DEPTH, IORING_SETUP_SQE128); > + if (ret < 0) { > + ublk_err("queue_init: %s\n", strerror(-ret)); > + free(dev); > + return NULL; > + } > + dev->nr_fds = 1; > + > + return dev; > +} > + > +static int ublk_queue_cmd_buf_sz(struct ublk_queue *q) > +{ > + int size = q->q_depth * sizeof(struct ublksrv_io_desc); > + unsigned int page_sz = getpagesize(); > + > + return round_up(size, page_sz); > +} > + > +static void ublk_queue_deinit(struct ublk_queue *q) > +{ > + int i; > + int nr_ios = q->q_depth; > + > + io_uring_unregister_ring_fd(&q->ring); > + > + if (q->ring.ring_fd > 0) { > + io_uring_unregister_files(&q->ring); > + close(q->ring.ring_fd); > + q->ring.ring_fd = -1; > + } > + > + if (q->io_cmd_buf) > + munmap(q->io_cmd_buf, ublk_queue_cmd_buf_sz(q)); > + > + for (i = 0; i < nr_ios; i++) > + free(q->ios[i].buf_addr); > +} > + > +static int ublk_queue_init(struct ublk_queue *q) > +{ > + struct ublk_dev *dev = q->dev; > + int depth = dev->dev_info.queue_depth; > + int i, ret = -1; > + int cmd_buf_size, io_buf_size; > + unsigned long off; > + int ring_depth = depth, cq_depth = depth; > + > + q->tgt_ops = dev->tgt.ops; > + q->state = 0; > + q->q_depth = depth; > + q->cmd_inflight = 0; > + q->tid = gettid(); > + > + cmd_buf_size = ublk_queue_cmd_buf_sz(q); > + off = UBLKSRV_CMD_BUF_OFFSET + > + q->q_id * (UBLK_MAX_QUEUE_DEPTH * sizeof(struct ublksrv_io_desc)); > + q->io_cmd_buf = (char *)mmap(0, cmd_buf_size, PROT_READ, > + MAP_SHARED | MAP_POPULATE, dev->fds[0], off); > + if (q->io_cmd_buf == MAP_FAILED) { > + ublk_err("ublk dev %d queue %d map io_cmd_buf failed\n", > + q->dev->dev_info.dev_id, q->q_id); also add strerror(errno) to message ? > + goto fail; > + } > + > + io_buf_size = dev->dev_info.max_io_buf_bytes; > + for (i = 0; i < q->q_depth; i++) { > + q->ios[i].buf_addr = NULL; > + > + if (posix_memalign((void **)&q->ios[i].buf_addr, > + getpagesize(), io_buf_size)) { > + ublk_err("ublk dev %d queue %d io %d posix_memalign failed\n", > + dev->dev_info.dev_id, q->q_id, i); > + goto fail; > + } same as previous comment .. > + q->ios[i].flags = UBLKSRV_NEED_FETCH_RQ | UBLKSRV_IO_FREE; > + } > + > + ret = ublk_setup_ring(&q->ring, ring_depth, cq_depth, > + IORING_SETUP_SQE128 | IORING_SETUP_COOP_TASKRUN); > + if (ret < 0) { > + ublk_err("ublk dev %d queue %d setup io_uring failed %d\n", > + q->dev->dev_info.dev_id, q->q_id, ret); > + goto fail; > + } > + > + io_uring_register_ring_fd(&q->ring); > + > + ret = io_uring_register_files(&q->ring, dev->fds, dev->nr_fds); > + if (ret) { > + ublk_err("ublk dev %d queue %d register files failed %d\n", > + q->dev->dev_info.dev_id, q->q_id, ret); > + goto fail; > + } > + > + return 0; > + fail: > + ublk_queue_deinit(q); > + ublk_err("ublk dev %d queue %d failed\n", > + dev->dev_info.dev_id, q->q_id); > + return -ENOMEM; > +} > + > +static int ublk_dev_prep(struct ublk_dev *dev) > +{ > + int dev_id = dev->dev_info.dev_id; > + char buf[64]; > + int ret; > + > + snprintf(buf, 64, "%s%d", UBLKC_DEV, dev_id); > + dev->fds[0] = open(buf, O_RDWR); > + if (dev->fds[0] < 0) { > + ret = -EBADF; > + ublk_err("can't open %s, ret %d\n", buf, dev->fds[0]); > + goto fail; > + } > + > + if (dev->tgt.ops->init_tgt) > + ret = dev->tgt.ops->init_tgt(dev); > + else > + ret = 0; > + if ret is initialize to 0 at the time of declaration in above function we can safely get rid of else above ? > + return ret; > +fail: > + close(dev->fds[0]); > + return ret; > +} > + > +static void ublk_dev_unprep(struct ublk_dev *dev) > +{ > + if (dev->tgt.ops->deinit_tgt) > + dev->tgt.ops->deinit_tgt(dev); > + close(dev->fds[0]); > +} > + > +static int ublk_queue_io_cmd(struct ublk_queue *q, > + struct ublk_io *io, unsigned tag) > +{ > + struct ublksrv_io_cmd *cmd; > + struct io_uring_sqe *sqe; > + unsigned int cmd_op = 0; > + __u64 user_data; > + > + /* only freed io can be issued */ > + if (!(io->flags & UBLKSRV_IO_FREE)) > + return 0; > + > + /* we issue because we need either fetching or committing */ > + if (!(io->flags & > + (UBLKSRV_NEED_FETCH_RQ | UBLKSRV_NEED_COMMIT_RQ_COMP))) > + return 0; > + > + if (io->flags & UBLKSRV_NEED_COMMIT_RQ_COMP) > + cmd_op = UBLK_IO_COMMIT_AND_FETCH_REQ; > + else if (io->flags & UBLKSRV_NEED_FETCH_RQ) > + cmd_op = UBLK_IO_FETCH_REQ; > + > + sqe = io_uring_get_sqe(&q->ring); > + if (!sqe) { > + ublk_err("%s: run out of sqe %d, tag %d\n", > + __func__, q->q_id, tag); > + return -1; > + } > + > + cmd = (struct ublksrv_io_cmd *)ublk_get_sqe_cmd(sqe); > + > + if (cmd_op == UBLK_IO_COMMIT_AND_FETCH_REQ) > + cmd->result = io->result; > + > + /* These fields should be written once, never change */ > + ublk_set_sqe_cmd_op(sqe, cmd_op); > + sqe->fd = 0; /* dev->fds[0] */ > + sqe->opcode = IORING_OP_URING_CMD; > + sqe->flags = IOSQE_FIXED_FILE; > + sqe->rw_flags = 0; > + cmd->tag = tag; > + cmd->addr = (__u64)io->buf_addr; > + cmd->q_id = q->q_id; > + > + user_data = build_user_data(tag, cmd_op, 0, 0); > + io_uring_sqe_set_data64(sqe, user_data); > + > + io->flags = 0; > + > + q->cmd_inflight += 1; > + > + ublk_dbg(UBLK_DBG_IO_CMD, "%s: (qid %d tag %u cmd_op %u) iof %x stopping %d\n", > + __func__, q->q_id, tag, cmd_op, > + io->flags, !!(q->state & UBLKSRV_QUEUE_STOPPING)); > + return 1; > +} > + > +static int ublk_complete_io(struct ublk_queue *q, > + unsigned tag, int res) > +{ > + struct ublk_io *io = &q->ios[tag]; > + > + ublk_mark_io_done(io, res); > + > + return ublk_queue_io_cmd(q, io, tag); > +} > + > +static void ublk_submit_fetch_commands(struct ublk_queue *q) > +{ > + int i = 0; > + > + for (i = 0; i < q->q_depth; i++) > + ublk_queue_io_cmd(q, &q->ios[i], i); > +} > + > +static int ublk_queue_is_idle(struct ublk_queue *q) > +{ > + return !io_uring_sq_ready(&q->ring) && !q->io_inflight; > +} > + > +static int ublk_queue_is_done(struct ublk_queue *q) > +{ > + return (q->state & UBLKSRV_QUEUE_STOPPING) && ublk_queue_is_idle(q); > +} > + > +static void ublk_queue_discard_io_pages(struct ublk_queue *q) > +{ > + const struct ublk_dev *dev = q->dev; > + unsigned int io_buf_size = dev->dev_info.max_io_buf_bytes; > + int i = 0; > + > + for (i = 0; i < q->q_depth; i++) > + madvise(q->ios[i].buf_addr, io_buf_size, MADV_DONTNEED); > +} > + > +static void ublk_queue_idle_enter(struct ublk_queue *q) > +{ > + if (q->state & UBLKSRV_QUEUE_IDLE) > + return; > + > + ublk_dbg(UBLK_DBG_QUEUE, "dev%d-q%d: enter idle %x\n", > + q->dev->dev_info.dev_id, q->q_id, q->state); > + ublk_queue_discard_io_pages(q); > + q->state |= UBLKSRV_QUEUE_IDLE; > +} > + > +static void ublk_queue_idle_exit(struct ublk_queue *q) > +{ > + if (q->state & UBLKSRV_QUEUE_IDLE) { > + ublk_dbg(UBLK_DBG_QUEUE, "dev%d-q%d: exit idle %x\n", > + q->dev->dev_info.dev_id, q->q_id, q->state); > + q->state &= ~UBLKSRV_QUEUE_IDLE; > + } > +} > + > +static inline void ublksrv_handle_tgt_cqe(struct ublk_queue *q, > + struct io_uring_cqe *cqe) > +{ > + unsigned tag = user_data_to_tag(cqe->user_data); > + > + if (cqe->res < 0 && cqe->res != -EAGAIN) > + ublk_err("%s: failed tgt io: res %d qid %u tag %u, cmd_op %u\n", > + __func__, cqe->res, q->q_id, > + user_data_to_tag(cqe->user_data), > + user_data_to_op(cqe->user_data)); > + > + if (q->tgt_ops->tgt_io_done) > + q->tgt_ops->tgt_io_done(q, tag, cqe); > +} > + > +static void ublk_handle_cqe(struct io_uring *r, > + struct io_uring_cqe *cqe, void *data) > +{ > + struct ublk_queue *q = container_of(r, struct ublk_queue, ring); > + unsigned tag = user_data_to_tag(cqe->user_data); > + unsigned cmd_op = user_data_to_op(cqe->user_data); > + int fetch = (cqe->res != UBLK_IO_RES_ABORT) && > + !(q->state & UBLKSRV_QUEUE_STOPPING); > + struct ublk_io *io; > + > + ublk_dbg(UBLK_DBG_IO_CMD, "%s: res %d (qid %d tag %u cmd_op %u target %d) stopping %d\n", > + __func__, cqe->res, q->q_id, tag, cmd_op, > + is_target_io(cqe->user_data), > + (q->state & UBLKSRV_QUEUE_STOPPING)); > + > + /* Don't retrieve io in case of target io */ > + if (is_target_io(cqe->user_data)) { > + ublksrv_handle_tgt_cqe(q, cqe); > + return; > + } > + > + io = &q->ios[tag]; > + q->cmd_inflight--; > + > + if (!fetch) { > + q->state |= UBLKSRV_QUEUE_STOPPING; > + io->flags &= ~UBLKSRV_NEED_FETCH_RQ; > + } > + > + if (cqe->res == UBLK_IO_RES_OK) { > + ublk_assert(tag < q->q_depth); > + q->tgt_ops->queue_io(q, tag); > + } else { > + /* > + * COMMIT_REQ will be completed immediately since no fetching > + * piggyback is required. > + * > + * Marking IO_FREE only, then this io won't be issued since > + * we only issue io with (UBLKSRV_IO_FREE | UBLKSRV_NEED_*) > + * > + * */ > + io->flags = UBLKSRV_IO_FREE; > + } if you can return; from above if we can remove the else, no biggy though.. > +} > + > +static int ublk_reap_events_uring(struct io_uring *r) > +{ > + struct io_uring_cqe *cqe; > + unsigned head; > + int count = 0; > + > + io_uring_for_each_cqe(r, head, cqe) { > + ublk_handle_cqe(r, cqe, NULL); > + count += 1; > + } > + io_uring_cq_advance(r, count); > + > + return count; > +} > + > +static int ublk_process_io(struct ublk_queue *q) > +{ > + int ret, reapped; > + struct __kernel_timespec ts = { > + .tv_sec = UBLKSRV_IO_IDLE_SECS, > + .tv_nsec = 0 > + }; > + struct __kernel_timespec *tsp = (q->state & UBLKSRV_QUEUE_IDLE) ? > + NULL : &ts; > + struct io_uring_cqe *cqe; > + > + ublk_dbg(UBLK_DBG_QUEUE, "dev%d-q%d: to_submit %d inflight cmd %u stopping %d\n", > + q->dev->dev_info.dev_id, > + q->q_id, io_uring_sq_ready(&q->ring), > + q->cmd_inflight, > + (q->state & UBLKSRV_QUEUE_STOPPING)); > + > + if (ublk_queue_is_done(q)) > + return -ENODEV; > + > + ret = io_uring_submit_and_wait_timeout(&q->ring, &cqe, 1, tsp, NULL); > + reapped = ublk_reap_events_uring(&q->ring); > + > + ublk_dbg(UBLK_DBG_QUEUE, "submit result %d, reapped %d stop %d idle %d\n", > + ret, reapped, (q->state & UBLKSRV_QUEUE_STOPPING), > + (q->state & UBLKSRV_QUEUE_IDLE)); > + > + if (!(q->state & UBLKSRV_QUEUE_STOPPING)) { > + if (ret == -ETIME && reapped == 0 && ublk_queue_is_idle(q)) > + ublk_queue_idle_enter(q); > + else > + ublk_queue_idle_exit(q); > + } > + return reapped; > +} > + > +static void *ublk_io_handler_fn(void *data) > +{ > + struct ublk_queue *q = data; > + int dev_id = q->dev->dev_info.dev_id; > + int ret; > + > + ret = ublk_queue_init(q); > + if (ret) { > + ublk_err("ublk dev %d queue %d init queue failed\n", > + dev_id, q->q_id); > + return NULL; > + } > + > + /* submit all io commands to ublk driver */ > + ublk_submit_fetch_commands(q); > + > + ublk_dbg(UBLK_DBG_QUEUE, "tid %d: ublk dev %d queue %d started\n", > + gettid(), > + dev_id, q->q_id); > + do { > + if (ublk_process_io(q) < 0) > + break; > + } while (1); > + > + ublk_dbg(UBLK_DBG_QUEUE, "ublk dev %d queue %d exited\n", dev_id, q->q_id); > + ublk_queue_deinit(q); > + return NULL; > +} > + > +static void ublk_set_parameters(struct ublk_dev *dev) > +{ > + int ret; > + > + ret = ublk_ctrl_set_params(dev, &dev->tgt.params); > + if (ret) > + ublk_err("dev %d set basic parameter failed %d\n", > + dev->dev_info.dev_id, ret); > +} > + > +static int ublk_start_daemon(struct ublk_dev *dev) > +{ > + int ret, i; > + void *thread_ret; > + const struct ublksrv_ctrl_dev_info *dinfo = &dev->dev_info; > + > + daemon(1, 1); > + > + ublk_dbg(UBLK_DBG_DEV, "%s enter\n", __func__); > + > + ret = ublk_dev_prep(dev); > + if (!dev) > + return -ENOMEM; > + can dev be NULL ever here ? do we need to check ret instaed of dev? (I don't think so since looking it ublk_dev_prep() it will not change dev value and it is unconditionally deferenced before it's use). also shuldn't we return ret here as ublk_dev_prep() will retrun non -ENOMEM value ? > + for (i = 0; i < dinfo->nr_hw_queues; i++) { > + dev->q[i].dev = dev; > + dev->q[i].q_id = i; > + pthread_create(&dev->q[i].thread, NULL, > + ublk_io_handler_fn, > + &dev->q[i]); > + } > + > + ublk_set_parameters(dev); > + > + /* everything is fine now, start us */ > + ret = ublk_ctrl_start_dev(dev, getpid()); > + if (ret < 0) > + goto fail; > + > + ublk_ctrl_get_info(dev); > + ublk_ctrl_dump(dev, true); > + > + /* wait until we are terminated */ > + for (i = 0; i < dinfo->nr_hw_queues; i++) > + pthread_join(dev->q[i].thread, &thread_ret); > + fail: > + ublk_dev_unprep(dev); > + ublk_dbg(UBLK_DBG_DEV, "%s exit\n", __func__); > + > + return ret; > +} > + > +static int cmd_dev_add(int argc, char *argv[]) > +{ > + static const struct option longopts[] = { > + { "type", 1, NULL, 't' }, > + { "number", 1, NULL, 'n' }, > + { "queues", 1, NULL, 'q' }, > + { "depth", 1, NULL, 'd' }, > + { "debug_mask", 1, NULL, 0}, > + { "quiet", 0, NULL, 0}, > + { NULL } > + }; > + const struct ublk_tgt_ops *ops; > + struct ublksrv_ctrl_dev_info *info; > + struct ublk_dev *dev; > + int ret, option_idx, opt; > + const char *tgt_type = NULL; > + int dev_id = -1; > + unsigned nr_queues = 2, depth = UBLK_QUEUE_DEPTH; > + > + while ((opt = getopt_long(argc, argv, "-:t:n:d:q:", > + longopts, &option_idx)) != -1) { > + switch (opt) { > + case 'n': > + dev_id = strtol(optarg, NULL, 10); > + break; > + case 't': > + tgt_type = optarg; > + break; > + case 'q': > + nr_queues = strtol(optarg, NULL, 10); > + break; > + case 'd': > + depth = strtol(optarg, NULL, 10); > + break; > + case 0: > + if (!strcmp(longopts[option_idx].name, "debug_mask")) > + ublk_dbg_mask = strtol(optarg, NULL, 16); > + if (!strcmp(longopts[option_idx].name, "quiet")) > + ublk_dbg_mask = 0; > + break; > + } > + } > + > + optind = 0; > + > + ops = ublk_find_tgt(tgt_type); > + if (!ops) { > + ublk_err("%s: no such tgt type, type %s\n", > + __func__, tgt_type); > + return -ENODEV; > + } > + > + if (nr_queues > UBLK_MAX_QUEUES || depth > UBLK_QUEUE_DEPTH) { > + ublk_err("%s: invalid nr_queues or depth queues %u depth %u\n", > + __func__, nr_queues, depth); > + return -EINVAL; > + } > + > + dev = ublk_ctrl_init(); > + if (!dev) { > + ublk_err("%s: can't alloc dev id %d, type %s\n", > + __func__, dev_id, tgt_type); > + return -ENOMEM; > + } > + > + info = &dev->dev_info; > + info->dev_id = dev_id; > + info->nr_hw_queues = nr_queues; > + info->queue_depth = depth; > + dev->tgt.ops = ops; > + dev->tgt.argc = argc; > + dev->tgt.argv = argv; > + > + ret = ublk_ctrl_add_dev(dev); > + if (ret < 0) { > + ublk_err("%s: can't add dev id %d, type %s ret %d\n", > + __func__, dev_id, tgt_type, ret); > + goto fail; > + } > + > + ret = ublk_start_daemon(dev); > + if (ret < 0) { > + ublk_err("%s: can't start daemon id %d, type %s\n", > + __func__, dev_id, tgt_type); > + goto fail_del; > + } > +fail_del: > + ublk_ctrl_del_dev(dev); > +fail: > + ublk_ctrl_deinit(dev); > + return ret; > +} > + > +static int ublk_stop_io_daemon(const struct ublk_dev *dev) > +{ > + int daemon_pid = dev->dev_info.ublksrv_pid; > + int cnt = 0, ret; > + > + if (daemon_pid == -1) > + return 0; > + > + /* wait until daemon is exited, or timeout after 3 seconds */ > + do { > + ret = kill(daemon_pid, 0); > + if (ret) > + break; > + usleep(500000); > + cnt++; > + } while (!ret && cnt < 6); > + > + ublk_dbg(UBLK_DBG_DEV, "%s: pid %d ret %d\n", __func__, daemon_pid, ret); > + > + return ret != 0 ? 0 : -1; > +} > + > +static int __cmd_dev_del(int number, bool log) > +{ > + struct ublk_dev *dev; > + int ret; > + > + dev = ublk_ctrl_init(); > + dev->dev_info.dev_id = number; > + > + ret = ublk_ctrl_get_info(dev); > + if (ret < 0) { > + goto fail; > + } > + > + ret = ublk_ctrl_stop_dev(dev); > + if (ret < 0) { > + if (log) > + ublk_err("stop dev %d failed\n", number); > + goto fail; > + } > + > + ret = ublk_stop_io_daemon(dev); > + if (ret < 0) { > + if (log) > + ublk_err("stop daemon %d failed\n", number); > + } > + > + ublk_ctrl_del_dev(dev); > +fail: > + ublk_ctrl_deinit(dev); > + return ret; > +} > + > +static int cmd_dev_del(int argc, char *argv[]) > +{ > + static const struct option longopts[] = { > + { "number", 1, NULL, 'n' }, > + { "all", 0, NULL, 'a' }, > + { "debug_mask", 1, NULL, 0}, > + { NULL } > + }; > + int number = -2; > + int opt, i, option_idx; > + > + while ((opt = getopt_long(argc, argv, "n:a", > + longopts, &option_idx)) != -1) { > + switch (opt) { > + case 'a': > + number = -1; > + break; > + > + case 'n': > + number = strtol(optarg, NULL, 10); > + break; > + case 0: > + if (!strcmp(longopts[option_idx].name, "debug_mask")) > + ublk_dbg_mask = strtol(optarg, NULL, 16); > + break; > + } > + } > + > + if (number >= 0) > + return __cmd_dev_del(number, true); > + else if (number != -1) { > + ublk_err("%s: pass wrong devid or not delete via -a\n"); > + return -EINVAL; > + } > + > + for (i = 0; i < 255; i++) > + __cmd_dev_del(i, false); > + > + return 0; > +} > + > +static int __cmd_dev_list(int number, bool log) > +{ > + struct ublk_dev *dev = ublk_ctrl_init(); > + int ret; > + > + dev->dev_info.dev_id = number; > + > + ret = ublk_ctrl_get_info(dev); > + if (ret < 0) { > + if (log) > + ublk_err("%s: can't get dev info from %d: %d\n", > + __func__, number, ret); > + } else { > + ublk_ctrl_dump(dev, false); > + } > + > + ublk_ctrl_deinit(dev); > + > + return ret; > +} > + > + > +static int cmd_dev_list(int argc, char *argv[]) > +{ > + static const struct option longopts[] = { > + { "number", 1, NULL, 'n' }, > + { "all", 0, NULL, 'a' }, > + { NULL } > + }; > + int number = -1; > + int opt, i; > + > + while ((opt = getopt_long(argc, argv, "n:a", > + longopts, NULL)) != -1) { > + switch (opt) { > + case 'a': > + break; > + > + case 'n': > + number = strtol(optarg, NULL, 10); > + break; > + } > + } > + > + if (number >= 0) > + return __cmd_dev_list(number, true); > + > + for (i = 0; i < 255; i++) > + __cmd_dev_list(i, false); > + > + return 0; > +} > + > +static int cmd_dev_help(int argc, char *argv[]) > +{ > + printf("%s add -t {null|loop} [-q nr_queues] [-d depth] [-n dev_id] \n", > + argv[0]); > + printf("\t default: nr_queues=2(max 4), depth=128(max 128), dev_id=-1(auto allocation)\n"); > + printf("\t -t loop -f backing_file \n"); > + printf("\t -t null\n"); > + printf("%s del [-n dev_id] -a \n", argv[0]); > + printf("\t -a delete all devices -n delete specified device\n"); > + printf("%s list [-n dev_id] -a \n", argv[0]); > + printf("\t -a list all devices, -n list specified device, default -a \n"); > + > + return 0; > +} > + > +static int ublk_null_tgt_init(struct ublk_dev *dev) > +{ > + const struct ublksrv_ctrl_dev_info *info = &dev->dev_info; > + unsigned long dev_size = 250UL << 30; > + > + dev->tgt.dev_size = dev_size; > + dev->tgt.params = (struct ublk_params) { > + .types = UBLK_PARAM_TYPE_BASIC, > + .basic = { > + .logical_bs_shift = 9, > + .physical_bs_shift = 12, > + .io_opt_shift = 12, > + .io_min_shift = 9, > + .max_sectors = info->max_io_buf_bytes >> 9, > + .dev_sectors = dev_size >> 9, > + }, > + }; > + > + return 0; > +} > + > +static int ublk_null_queue_io(struct ublk_queue *q, int tag) > +{ > + const struct ublksrv_io_desc *iod = ublk_get_iod(q, tag); > + > + ublk_complete_io(q, tag, iod->nr_sectors << 9); > + > + return 0; > +} > + > +static int loop_queue_tgt_io(struct ublk_queue *q, int tag) > +{ > + const struct ublksrv_io_desc *iod = ublk_get_iod(q, tag); > + struct io_uring_sqe *sqe = io_uring_get_sqe(&q->ring); > + unsigned ublk_op = ublksrv_get_op(iod); > + > + if (!sqe) > + return -ENOMEM; > + > + switch (ublk_op) { > + case UBLK_IO_OP_FLUSH: > + io_uring_prep_sync_file_range(sqe, 1 /*fds[1]*/, > + iod->nr_sectors << 9, > + iod->start_sector << 9, > + IORING_FSYNC_DATASYNC); > + io_uring_sqe_set_flags(sqe, IOSQE_FIXED_FILE); > + break; > + case UBLK_IO_OP_WRITE_ZEROES: > + case UBLK_IO_OP_DISCARD: > + return -ENOTSUP; > + case UBLK_IO_OP_READ: > + io_uring_prep_read(sqe, 1 /*fds[1]*/, > + (void *)iod->addr, > + iod->nr_sectors << 9, > + iod->start_sector << 9); > + io_uring_sqe_set_flags(sqe, IOSQE_FIXED_FILE); > + break; > + case UBLK_IO_OP_WRITE: > + io_uring_prep_write(sqe, 1 /*fds[1]*/, > + (void *)iod->addr, > + iod->nr_sectors << 9, > + iod->start_sector << 9); > + io_uring_sqe_set_flags(sqe, IOSQE_FIXED_FILE); > + break; > + default: > + return -EINVAL; > + } > + > + q->io_inflight++; > + /* bit63 marks us as tgt io */ > + sqe->user_data = build_user_data(tag, ublk_op, 0, 1); > + > + ublk_dbg(UBLK_DBG_IO, "%s: tag %d ublk io %x %llx %u\n", __func__, tag, > + iod->op_flags, iod->start_sector, iod->nr_sectors << 9); > + return 1; > +} > + > +static int ublk_loop_queue_io(struct ublk_queue *q, int tag) > +{ > + int queued = loop_queue_tgt_io(q, tag); > + > + if (queued < 0) > + ublk_complete_io(q, tag, queued); > + > + return 0; > +} > + > +static void ublk_loop_io_done(struct ublk_queue *q, int tag, > + const struct io_uring_cqe *cqe) > +{ > + int cqe_tag = user_data_to_tag(cqe->user_data); > + > + ublk_assert(tag == cqe_tag); > + ublk_complete_io(q, tag, cqe->res); > + q->io_inflight--; > +} > + > +static void ublk_loop_tgt_deinit(struct ublk_dev *dev) > +{ > + fsync(dev->fds[1]); > + close(dev->fds[1]); > +} > + > +static int ublk_loop_tgt_init(struct ublk_dev *dev) > +{ > + static const struct option lo_longopts[] = { > + { "file", 1, NULL, 'f' }, > + { NULL } > + }; > + unsigned long long bytes; > + char **argv = dev->tgt.argv; > + int argc = dev->tgt.argc; > + char *file = NULL; > + struct stat st; > + int fd, opt; > + struct ublk_params p = { > + .types = UBLK_PARAM_TYPE_BASIC, > + .basic = { > + .logical_bs_shift = 9, > + .physical_bs_shift = 12, > + .io_opt_shift = 12, > + .io_min_shift = 9, > + .max_sectors = dev->dev_info.max_io_buf_bytes >> 9, > + }, > + }; > + > + while ((opt = getopt_long(argc, argv, "-:f:", > + lo_longopts, NULL)) != -1) { > + switch (opt) { > + case 'f': > + file = strdup(optarg); > + break; > + } > + } > + > + ublk_dbg(UBLK_DBG_DEV, "%s: file %s\n", __func__, file); > + > + if (!file) > + return -EINVAL; > + > + fd = open(file, O_RDWR); > + if (fd < 0) { > + ublk_err( "%s: backing file %s can't be opened\n", > + __func__, file); > + return -EBADF; > + } > + > + if (fstat(fd, &st) < 0) { > + close(fd); > + return -EBADF; > + } > + > + if (S_ISBLK(st.st_mode)) { > + unsigned int bs, pbs; > + > + if (ioctl(fd, BLKGETSIZE64, &bytes) != 0) > + return -EBADF; > + if (ioctl(fd, BLKSSZGET, &bs) != 0) > + return -1; > + if (ioctl(fd, BLKPBSZGET, &pbs) != 0) > + return -1; > + p.basic.logical_bs_shift = ilog2(bs); > + p.basic.physical_bs_shift = ilog2(pbs); > + } else if (S_ISREG(st.st_mode)) { > + bytes = st.st_size; > + } else { > + bytes = 0; > + } > + > + if (fcntl(fd, F_SETFL, O_DIRECT)) { > + p.basic.logical_bs_shift = 9; > + p.basic.physical_bs_shift = 12; > + ublk_log("%s: ublk-loop fallback to buffered IO\n", __func__); > + } > + > + dev->tgt.dev_size = bytes; > + p.basic.dev_sectors = bytes >> 9; > + dev->fds[1] = fd; > + dev->nr_fds += 1; > + dev->tgt.params = p; > + > + return 0; > +} > + > +const struct ublk_tgt_ops tgt_ops_list[] = { > + { > + .name = "null", > + .init_tgt = ublk_null_tgt_init, > + .queue_io = ublk_null_queue_io, > + }, > + > + { > + .name = "loop", > + .init_tgt = ublk_loop_tgt_init, > + .deinit_tgt = ublk_loop_tgt_deinit, > + .queue_io = ublk_loop_queue_io, > + .tgt_io_done = ublk_loop_io_done, > + }, > +}; > + > +static const struct ublk_tgt_ops *ublk_find_tgt(const char *name) > +{ > + const struct ublk_tgt_ops *ops; > + int i; > + > + if (name == NULL) > + return NULL; > + > + for (i = 0; sizeof(tgt_ops_list) / sizeof(*ops); i++) > + if (strcmp(tgt_ops_list[i].name, name) == 0) > + return &tgt_ops_list[i]; > + return NULL; > +} > + > +int main(int argc, char *argv[]) > +{ > + const char *cmd = argv[1]; > + int ret = -EINVAL; > + > + if (argc == 1) > + goto out; > + > + if (!strcmp(cmd, "add")) > + ret = cmd_dev_add(argc, argv); > + else if (!strcmp(cmd, "del")) > + ret = cmd_dev_del(argc, argv); > + else if (!strcmp(cmd, "list")) > + ret = cmd_dev_list(argc, argv); > + else if (!strcmp(cmd, "help")) > + ret = cmd_dev_help(argc, argv); > +out: > + if (ret) > + cmd_dev_help(argc, argv); > + > + return ret; > +}
On 2023/2/20 11:46, Ming Lei wrote: [...] > > Signed-off-by: Ming Lei <ming.lei@redhat.com> > --- > src/.gitignore | 1 + > src/Makefile | 18 + > src/miniublk.c | 1376 ++++++++++++++++++++++++++++++++++++++++++++++++ > 3 files changed, 1395 insertions(+) > create mode 100644 src/miniublk.c > > diff --git a/src/.gitignore b/src/.gitignore > index 355bed3..df7aff5 100644 > --- a/src/.gitignore > +++ b/src/.gitignore > @@ -8,3 +8,4 @@ > /sg/dxfer-from-dev > /sg/syzkaller1 > /zbdioctl > +/miniublk > diff --git a/src/Makefile b/src/Makefile > index 3b587f6..81c6541 100644 > --- a/src/Makefile > +++ b/src/Makefile > @@ -2,6 +2,10 @@ HAVE_C_HEADER = $(shell if echo "\#include <$(1)>" | \ > $(CC) -E - > /dev/null 2>&1; then echo "$(2)"; \ > else echo "$(3)"; fi) > > +HAVE_C_MACRO = $(shell if echo "#include <$(1)>" | \ Hi Ming, It should be "\#include", not "#include". You miss a "\". Regards, Zhang
On Fri, Feb 24, 2023 at 03:52:28PM +0800, Ziyang Zhang wrote: > On 2023/2/20 11:46, Ming Lei wrote: > > [...] > > > > > Signed-off-by: Ming Lei <ming.lei@redhat.com> > > --- > > src/.gitignore | 1 + > > src/Makefile | 18 + > > src/miniublk.c | 1376 ++++++++++++++++++++++++++++++++++++++++++++++++ > > 3 files changed, 1395 insertions(+) > > create mode 100644 src/miniublk.c > > > > diff --git a/src/.gitignore b/src/.gitignore > > index 355bed3..df7aff5 100644 > > --- a/src/.gitignore > > +++ b/src/.gitignore > > @@ -8,3 +8,4 @@ > > /sg/dxfer-from-dev > > /sg/syzkaller1 > > /zbdioctl > > +/miniublk > > diff --git a/src/Makefile b/src/Makefile > > index 3b587f6..81c6541 100644 > > --- a/src/Makefile > > +++ b/src/Makefile > > @@ -2,6 +2,10 @@ HAVE_C_HEADER = $(shell if echo "\#include <$(1)>" | \ > > $(CC) -E - > /dev/null 2>&1; then echo "$(2)"; \ > > else echo "$(3)"; fi) > > > > +HAVE_C_MACRO = $(shell if echo "#include <$(1)>" | \ > Hi Ming, > > It should be "\#include", not "#include". You miss a "\". "\#include" won't work for checking the macro of IORING_OP_URING_CMD. [root@ktest-36 linux]# echo "\#include <liburing.h>" | gcc -E - # 0 "<stdin>" # 0 "<built-in>" # 0 "<command-line>" # 1 "/usr/include/stdc-predef.h" 1 3 4 # 0 "<command-line>" 2 # 1 "<stdin>" \#include <liburing.h> Thanks, Ming
Hi Chaitanya, Thanks for the review! On Tue, Feb 21, 2023 at 07:58:48PM +0000, Chaitanya Kulkarni wrote: > On 2/19/2023 7:46 PM, Ming Lei wrote: > > Prepare for adding ublk related test: > > > > 1) ublk delete is sync removal, this way is convenient to > > blkg/queue/disk instance leak issue > > > > 2) mini ublk has two builtin target(null, loop), and loop IO is > > handled by io_uring, so we can use ublk to cover part of io_uring > > workloads > > > > 3) not like loop/nbd, ublk won't pre-allocate/add disk, and always > > add/delete disk dynamically, this way may cover disk plug & unplug > > tests > > > > 4) ublk specific test given people starts to use it, so better to > > let blktest cover ublk related tests > > > > Add mini ublk source for test purpose only, which is easy to use: > > > > ./miniublk add -t {null|loop} [-q nr_queues] [-d depth] [-n dev_id] > > default: nr_queues=2(max 4), depth=128(max 128), dev_id=-1(auto allocation) > > -t loop -f backing_file > > -t null > > ./miniublk del [-n dev_id] -a > > -a delete all devices, -n delete specified device > > ./miniublk list [-n dev_id] -a > > -a list all devices, -n list specified device, default -a > > > > miniublk depends on liburing 2.2, adds HAVE_LIBURING for checking if > > liburing 2.2 exists; also add HAVE_UBLK_HEADER for checking ublk kernel > > UAPI header exists. If either of two dependencies can't be met, simply > > ignore miniublk target. > > > > Also v6.0 is the 1st linux kernel release with ublk. > > > > Signed-off-by: Ming Lei <ming.lei@redhat.com> > > --- > > src/.gitignore | 1 + > > src/Makefile | 18 + > > src/miniublk.c | 1376 ++++++++++++++++++++++++++++++++++++++++++++++++ > > 3 files changed, 1395 insertions(+) > > create mode 100644 src/miniublk.c > > > > diff --git a/src/.gitignore b/src/.gitignore > > index 355bed3..df7aff5 100644 > > --- a/src/.gitignore > > +++ b/src/.gitignore > > @@ -8,3 +8,4 @@ > > /sg/dxfer-from-dev > > /sg/syzkaller1 > > /zbdioctl > > +/miniublk > > diff --git a/src/Makefile b/src/Makefile > > index 3b587f6..81c6541 100644 > > --- a/src/Makefile > > +++ b/src/Makefile > > @@ -2,6 +2,10 @@ HAVE_C_HEADER = $(shell if echo "\#include <$(1)>" | \ > > $(CC) -E - > /dev/null 2>&1; then echo "$(2)"; \ > > else echo "$(3)"; fi) > > > > +HAVE_C_MACRO = $(shell if echo "#include <$(1)>" | \ > > + $(CC) -E - 2>&1 /dev/null | grep $(2) > /dev/null 2>&1; \ > > + then echo 1;else echo 0; fi) > > + > > C_TARGETS := \ > > loblksize \ > > loop_change_fd \ > > @@ -13,16 +17,27 @@ C_TARGETS := \ > > sg/syzkaller1 \ > > zbdioctl > > > > +C_MINIUBLK := miniublk > > + > > +HAVE_LIBURING := $(call HAVE_C_MACRO,liburing.h,IORING_OP_URING_CMD) > > +HAVE_UBLK_HEADER := $(call HAVE_C_HEADER,linux/ublk_cmd.h,1) > > + > > CXX_TARGETS := \ > > discontiguous-io > > > > +ifeq ($(HAVE_LIBURING)$(HAVE_UBLK_HEADER), 11) > > +TARGETS := $(C_TARGETS) $(CXX_TARGETS) $(C_MINIUBLK) > > +else > > +$(info Skip $(C_MINIUBLK) build due to missing kernel header(v6.0+) or liburing(2.2+)) > > TARGETS := $(C_TARGETS) $(CXX_TARGETS) > > +endif > > > > CONFIG_DEFS := $(call HAVE_C_HEADER,linux/blkzoned.h,-DHAVE_LINUX_BLKZONED_H) > > > > override CFLAGS := -O2 -Wall -Wshadow $(CFLAGS) $(CONFIG_DEFS) > > override CXXFLAGS := -O2 -std=c++11 -Wall -Wextra -Wshadow -Wno-sign-compare \ > > -Werror $(CXXFLAGS) $(CONFIG_DEFS) > > +MINIUBLK_FLAGS := -D_GNU_SOURCE -lpthread -luring > > > > all: $(TARGETS) > > > > @@ -39,4 +54,7 @@ $(C_TARGETS): %: %.c > > $(CXX_TARGETS): %: %.cpp > > $(CXX) $(CPPFLAGS) $(CXXFLAGS) -o $@ $^ > > > > +$(C_MINIUBLK): %: miniublk.c > > + $(CC) $(CFLAGS) $(MINIUBLK_FLAGS) -o $@ miniublk.c > > + > > .PHONY: all clean install > > diff --git a/src/miniublk.c b/src/miniublk.c > > new file mode 100644 > > index 0000000..e84ba41 > > --- /dev/null > > +++ b/src/miniublk.c > > @@ -0,0 +1,1376 @@ > > +// SPDX-License-Identifier: GPL-3.0+ > > +// Copyright (C) 2023 Ming Lei > > + > > +/* > > + * io_uring based mini ublk implementation with null/loop target, > > + * for test purpose only. > > + * > > + * So please keep it clean & simple & reliable. > > + */ > > + > > +#include <unistd.h> > > +#include <stdlib.h> > > +#include <assert.h> > > +#include <stdio.h> > > +#include <stdarg.h> > > +#include <string.h> > > +#include <pthread.h> > > +#include <getopt.h> > > +#include <limits.h> > > +#include <sys/syscall.h> > > +#include <sys/mman.h> > > +#include <sys/ioctl.h> > > +#include <liburing.h> > > +#include <linux/ublk_cmd.h> > > + > > +#define CTRL_DEV "/dev/ublk-control" > > +#define UBLKC_DEV "/dev/ublkc" > > +#define UBLK_CTRL_RING_DEPTH 32 > > + > > +/* queue idle timeout */ > > +#define UBLKSRV_IO_IDLE_SECS 20 > > + > > +#define UBLK_IO_MAX_BYTES 65536 > > +#define UBLK_MAX_QUEUES 4 > > +#define UBLK_QUEUE_DEPTH 128 > > + > > +#define UBLK_DBG_DEV (1U << 0) > > +#define UBLK_DBG_QUEUE (1U << 1) > > +#define UBLK_DBG_IO_CMD (1U << 2) > > +#define UBLK_DBG_IO (1U << 3) > > +#define UBLK_DBG_CTRL_CMD (1U << 4) > > +#define UBLK_LOG (1U << 5) > > + > > +struct ublk_dev; > > +struct ublk_queue; > > + > > +struct ublk_ctrl_cmd_data { > > + unsigned short cmd_op; > > perhaps use enum type to avoid any type mismatach errors in future.. Sounds good. > > > +#define CTRL_CMD_HAS_DATA 1 > > +#define CTRL_CMD_HAS_BUF 2 > > + unsigned short flags; > > I'd keep falgs u64 or u32 just incase.. Fine. > > > + > > + __u64 data[2]; > > + __u64 addr; > > + __u32 len; > > +}; > > + > > +struct ublk_io { > > + char *buf_addr; > > + > > +#define UBLKSRV_NEED_FETCH_RQ (1UL << 0) > > +#define UBLKSRV_NEED_COMMIT_RQ_COMP (1UL << 1) > > +#define UBLKSRV_IO_FREE (1UL << 2) > > + unsigned int flags; > > + > > + unsigned int result; > > +}; > > + > > +struct ublk_tgt_ops { > > + const char *name; > > + int (*init_tgt)(struct ublk_dev *); > > + void (*deinit_tgt)(struct ublk_dev *); > > + > > + int (*queue_io)(struct ublk_queue *, int tag); > > + void (*tgt_io_done)(struct ublk_queue *, > > + int tag, const struct io_uring_cqe *); > > +}; > > + > > +struct ublk_tgt { > > + unsigned long dev_size; > > + const struct ublk_tgt_ops *ops; > > + int argc; > > + char **argv; > > + struct ublk_params params; > > +}; > > + > > +struct ublk_queue { > > + int q_id; > > + int q_depth; > > is there a reason to not use unsigned int for qid and qd above ? Because int is enough for holding nr_queues and depth. > > > + unsigned int cmd_inflight; > > + unsigned int io_inflight; > > + struct ublk_dev *dev; > > + const struct ublk_tgt_ops *tgt_ops; > > + char *io_cmd_buf; > > + struct io_uring ring; > > + struct ublk_io ios[UBLK_QUEUE_DEPTH]; > > +#define UBLKSRV_QUEUE_STOPPING (1U << 0) > > +#define UBLKSRV_QUEUE_IDLE (1U << 1) > > + unsigned state; > > + int tid; > > instead of int pid_t ? OK. > > > + pthread_t thread; > > +}; > > + > > +struct ublk_dev { > > + struct ublk_tgt tgt; > > + struct ublksrv_ctrl_dev_info dev_info; > > + struct ublk_queue q[UBLK_MAX_QUEUES]; > > + > > + int fds[2]; /* fds[0] points to /dev/ublkcN */ > > + int nr_fds; > > + int ctrl_fd; > > + struct io_uring ring; > > +}; > > + > > +#ifndef offsetof > > +#define offsetof(TYPE, MEMBER) ((size_t)&((TYPE *)0)->MEMBER) > > +#endif > > + > > +#ifndef container_of > > +#define container_of(ptr, type, member) ({ \ > > + unsigned long __mptr = (unsigned long)(ptr); \ > > + ((type *)(__mptr - offsetof(type, member))); }) > > +#endif > > + > > +#define round_up(val, rnd) \ > > + (((val) + ((rnd) - 1)) & ~((rnd) - 1)) > > + > > +#define ublk_assert(x) do { \ > > + if (!(x)) { \ > > + ublk_err("%s %d: assert!\n", __func__, __LINE__); \ > > + assert(x); \ > > + } \ > > +} while (0) > > + > > +static const struct ublk_tgt_ops *ublk_find_tgt(const char *name); > > + > > +static unsigned int ublk_dbg_mask = UBLK_LOG; > > + > > +static inline unsigned ilog2(unsigned x) > > +{ > > + return sizeof(unsigned) * 8 - 1 - __builtin_clz(x); > > +} > > + > > +static inline int is_target_io(__u64 user_data) > > +{ > > + return (user_data & (1ULL << 63)) != 0; > > +} > > + > > +static inline __u64 build_user_data(unsigned tag, unsigned op, > > + unsigned tgt_data, unsigned is_target_io) > > +{ > > + assert(!(tag >> 16) && !(op >> 8) && !(tgt_data >> 16)); > > + > > + return tag | (op << 16) | (tgt_data << 24) | (__u64)is_target_io << 63; > > +} > > + > > +static inline unsigned int user_data_to_tag(__u64 user_data) > > +{ > > + return user_data & 0xffff; > > +} > > + > > +static inline unsigned int user_data_to_op(__u64 user_data) > > +{ > > + return (user_data >> 16) & 0xff; > > +} > > + > > +static void ublk_err(const char *fmt, ...) > > +{ > > + va_list ap; > > + > > + va_start(ap, fmt); > > + vfprintf(stderr, fmt, ap); > > +} > > + > > +static void ublk_log(const char *fmt, ...) > > +{ > > + if (ublk_dbg_mask & UBLK_LOG) { > > + va_list ap; > > + > > + va_start(ap, fmt); > > + vfprintf(stdout, fmt, ap); > > + } > > +} > > + > > +static void ublk_dbg(int level, const char *fmt, ...) > > +{ > > + if (level & ublk_dbg_mask) { > > + va_list ap; > > + va_start(ap, fmt); > > + vfprintf(stdout, fmt, ap); > > + } > > +} > > + > > +static inline void *ublk_get_sqe_cmd(const struct io_uring_sqe *sqe) > > +{ > > + return (void *)&sqe->addr3; > > +} > > + > > +static inline void ublk_mark_io_done(struct ublk_io *io, int res) > > +{ > > + io->flags |= (UBLKSRV_NEED_COMMIT_RQ_COMP | UBLKSRV_IO_FREE); > > + io->result = res; > > +} > > + > > +static inline const struct ublksrv_io_desc *ublk_get_iod( > > + const struct ublk_queue *q, int tag) > > +{ > > + return (struct ublksrv_io_desc *) > > + &(q->io_cmd_buf[tag * sizeof(struct ublksrv_io_desc)]); > > +} > > + > > +static inline void ublk_set_sqe_cmd_op(struct io_uring_sqe *sqe, > > + __u32 cmd_op) > > +{ > > + __u32 *addr = (__u32 *)&sqe->off; > > + > > + addr[0] = cmd_op; > > + addr[1] = 0; > > +} > > + > > +static inline int ublk_setup_ring(struct io_uring *r, int depth, > > + int cq_depth, unsigned flags) > > +{ > > + struct io_uring_params p; > > + > > + memset(&p, 0, sizeof(p)); > > + p.flags = flags | IORING_SETUP_CQSIZE; > > + p.cq_entries = cq_depth; > > + > > + return io_uring_queue_init_params(depth, r, &p); > > +} > > + > > +static inline void ublk_ctrl_init_cmd(struct ublk_dev *dev, > > + struct io_uring_sqe *sqe, > > + struct ublk_ctrl_cmd_data *data) > > +{ > > + struct ublksrv_ctrl_dev_info *info = &dev->dev_info; > > + struct ublksrv_ctrl_cmd *cmd = (struct ublksrv_ctrl_cmd *)ublk_get_sqe_cmd(sqe); > > + > > + sqe->fd = dev->ctrl_fd; > > + sqe->opcode = IORING_OP_URING_CMD; > > + sqe->ioprio = 0; > > + > > + if (data->flags & CTRL_CMD_HAS_BUF) { > > + cmd->addr = data->addr; > > + cmd->len = data->len; > > + } > > + > > + if (data->flags & CTRL_CMD_HAS_DATA) > > + cmd->data[0] = data->data[0]; > > + > > + cmd->dev_id = info->dev_id; > > + cmd->queue_id = -1; > > + > > + ublk_set_sqe_cmd_op(sqe, data->cmd_op); > > + > > + io_uring_sqe_set_data(sqe, cmd); > > +} > > + > > +static int __ublk_ctrl_cmd(struct ublk_dev *dev, > > + struct ublk_ctrl_cmd_data *data) > > +{ > > + struct io_uring_sqe *sqe; > > + struct io_uring_cqe *cqe; > > + int ret = -EINVAL; > > + > > + sqe = io_uring_get_sqe(&dev->ring); > > + if (!sqe) { > > + ublk_err("%s: can't get sqe ret %d\n", __func__, ret); > > + return ret; > > + } > > + > > + ublk_ctrl_init_cmd(dev, sqe, data); > > + > > + ret = io_uring_submit(&dev->ring); > > + if (ret < 0) { > > + ublk_err("uring submit ret %d\n", ret); > > + return ret; > > + } > > + > > + ret = io_uring_wait_cqe(&dev->ring, &cqe); > > + if (ret < 0) { > > + ublk_err("wait cqe: %s\n", strerror(-ret)); > > + return ret; > > + } > > + io_uring_cqe_seen(&dev->ring, cqe); > > + > > + return cqe->res; > > +} > > + > > +int ublk_ctrl_stop_dev(struct ublk_dev *dev) > > +{ > > + struct ublk_ctrl_cmd_data data = { > > + .cmd_op = UBLK_CMD_STOP_DEV, > > + }; > > + int ret; > > + > > + ret = __ublk_ctrl_cmd(dev, &data); > > + return ret; > > you can just return directly from here and remove local var ret.. OK. > > > +} > > + > > +int ublk_ctrl_start_dev(struct ublk_dev *dev, > > + int daemon_pid) > > +{ > > + struct ublk_ctrl_cmd_data data = { > > + .cmd_op = UBLK_CMD_START_DEV, > > + .flags = CTRL_CMD_HAS_DATA, > > + }; > > + int ret; > > + > > + dev->dev_info.ublksrv_pid = data.data[0] = daemon_pid; > > + > > + ret = __ublk_ctrl_cmd(dev, &data); > > + > > + return ret; > > same here as previous comment ... > > > +} > > + > > +int ublk_ctrl_add_dev(struct ublk_dev *dev) > > +{ > > + struct ublk_ctrl_cmd_data data = { > > + .cmd_op = UBLK_CMD_ADD_DEV, > > + .flags = CTRL_CMD_HAS_BUF, > > + .addr = (__u64)&dev->dev_info, > > + .len = sizeof(struct ublksrv_ctrl_dev_info), > > + }; > > + > > + return __ublk_ctrl_cmd(dev, &data); > > +} > > + > > +int ublk_ctrl_del_dev(struct ublk_dev *dev) > > +{ > > + struct ublk_ctrl_cmd_data data = { > > + .cmd_op = UBLK_CMD_DEL_DEV, > > + .flags = 0, > > + }; > > + > > + return __ublk_ctrl_cmd(dev, &data); > > +} > > + > > +int ublk_ctrl_get_info(struct ublk_dev *dev) > > +{ > > + struct ublk_ctrl_cmd_data data = { > > + .cmd_op = UBLK_CMD_GET_DEV_INFO, > > + .flags = CTRL_CMD_HAS_BUF, > > + .addr = (__u64)&dev->dev_info, > > + .len = sizeof(struct ublksrv_ctrl_dev_info), > > + }; > > + > > + return __ublk_ctrl_cmd(dev, &data); > > +} > > + > > +int ublk_ctrl_set_params(struct ublk_dev *dev, > > + struct ublk_params *params) > > +{ > > + struct ublk_ctrl_cmd_data data = { > > + .cmd_op = UBLK_CMD_SET_PARAMS, > > + .flags = CTRL_CMD_HAS_BUF, > > + .addr = (__u64)params, > > + .len = sizeof(*params), > > + }; > > + params->len = sizeof(*params); > > + return __ublk_ctrl_cmd(dev, &data); > > +} > > + > > +static int ublk_ctrl_get_params(struct ublk_dev *dev, > > + struct ublk_params *params) > > +{ > > + struct ublk_ctrl_cmd_data data = { > > + .cmd_op = UBLK_CMD_GET_PARAMS, > > + .flags = CTRL_CMD_HAS_BUF, > > + .addr = (__u64)params, > > + .len = sizeof(*params), > > + }; > > + int ret; > > + > > + params->len = sizeof(*params); > > + > > + ret = __ublk_ctrl_cmd(dev, &data); > > + return ret; > > same here as previous comment ... > > > +} > > + > > +static const char *ublk_dev_state_desc(struct ublk_dev *dev) > > +{ > > + switch (dev->dev_info.state) { > > + case UBLK_S_DEV_DEAD: > > + return "DEAD"; > > + case UBLK_S_DEV_LIVE: > > + return "LIVE"; > > + default: > > + return "UNKNOWN"; > > + }; > > +} > > + > > +static void ublk_ctrl_dump(struct ublk_dev *dev, bool show_queue) > > +{ > > + struct ublksrv_ctrl_dev_info *info = &dev->dev_info; > > + int ret; > > + struct ublk_params p; > > + > > + ret = ublk_ctrl_get_params(dev, &p); > > + if (ret < 0) { > > + ublk_err("failed to get params %m\n"); > > + return; > > + } > > + > > + ublk_log("dev id %d: nr_hw_queues %d queue_depth %d block size %d dev_capacity %lld\n", > > + info->dev_id, > > + info->nr_hw_queues, info->queue_depth, > > + 1 << p.basic.logical_bs_shift, p.basic.dev_sectors); > > + ublk_log("\tmax rq size %d daemon pid %d flags 0x%llx state %s\n", > > + info->max_io_buf_bytes, > > + info->ublksrv_pid, info->flags, > > + ublk_dev_state_desc(dev)); > > + if (show_queue) { > > + int i; > > + > > + for (i = 0; i < dev->dev_info.nr_hw_queues; i++) > > + ublk_log("\tqueue 0 tid: %d\n", dev->q[i].tid); > > + } > > + fflush(stdout); > > +} > > + > > +static void ublk_ctrl_deinit(struct ublk_dev *dev) > > +{ > > + close(dev->ctrl_fd); > > + free(dev); > > +} > > + > > +static struct ublk_dev *ublk_ctrl_init() > > perhaps specify void -> ublk_ctrl_init(void) > > > +{ > > + struct ublk_dev *dev = (struct ublk_dev *)calloc(1, sizeof(*dev)); > > + struct ublksrv_ctrl_dev_info *info = &dev->dev_info; > > + int ret; > > + > > + dev->ctrl_fd = open(CTRL_DEV, O_RDWR); > > + if (dev->ctrl_fd < 0) { > > + ublk_err("control dev %s can't be opened: %m\n", CTRL_DEV); > > perhaps also print the errno that is set by the open system call unless > ublk_err is doing that implicitly which I'm not sure. > > also I didn't understand two format specifiers for above call and only > one argument its a bit confusing to read the code... %m is supposed to print message of errno, but it is fine to add the errno too. > > > + exit(dev->ctrl_fd); > > + } > > + info->max_io_buf_bytes = UBLK_IO_MAX_BYTES; > > + > > + ret = ublk_setup_ring(&dev->ring, UBLK_CTRL_RING_DEPTH, > > + UBLK_CTRL_RING_DEPTH, IORING_SETUP_SQE128); > > + if (ret < 0) { > > + ublk_err("queue_init: %s\n", strerror(-ret)); > > + free(dev); > > + return NULL; > > + } > > + dev->nr_fds = 1; > > + > > + return dev; > > +} > > + > > +static int ublk_queue_cmd_buf_sz(struct ublk_queue *q) > > +{ > > + int size = q->q_depth * sizeof(struct ublksrv_io_desc); > > + unsigned int page_sz = getpagesize(); > > + > > + return round_up(size, page_sz); > > +} > > + > > +static void ublk_queue_deinit(struct ublk_queue *q) > > +{ > > + int i; > > + int nr_ios = q->q_depth; > > + > > + io_uring_unregister_ring_fd(&q->ring); > > + > > + if (q->ring.ring_fd > 0) { > > + io_uring_unregister_files(&q->ring); > > + close(q->ring.ring_fd); > > + q->ring.ring_fd = -1; > > + } > > + > > + if (q->io_cmd_buf) > > + munmap(q->io_cmd_buf, ublk_queue_cmd_buf_sz(q)); > > + > > + for (i = 0; i < nr_ios; i++) > > + free(q->ios[i].buf_addr); > > +} > > + > > +static int ublk_queue_init(struct ublk_queue *q) > > +{ > > + struct ublk_dev *dev = q->dev; > > + int depth = dev->dev_info.queue_depth; > > + int i, ret = -1; > > + int cmd_buf_size, io_buf_size; > > + unsigned long off; > > + int ring_depth = depth, cq_depth = depth; > > + > > + q->tgt_ops = dev->tgt.ops; > > + q->state = 0; > > + q->q_depth = depth; > > + q->cmd_inflight = 0; > > + q->tid = gettid(); > > + > > + cmd_buf_size = ublk_queue_cmd_buf_sz(q); > > + off = UBLKSRV_CMD_BUF_OFFSET + > > + q->q_id * (UBLK_MAX_QUEUE_DEPTH * sizeof(struct ublksrv_io_desc)); > > + q->io_cmd_buf = (char *)mmap(0, cmd_buf_size, PROT_READ, > > + MAP_SHARED | MAP_POPULATE, dev->fds[0], off); > > + if (q->io_cmd_buf == MAP_FAILED) { > > + ublk_err("ublk dev %d queue %d map io_cmd_buf failed\n", > > + q->dev->dev_info.dev_id, q->q_id); > > also add strerror(errno) to message ? %m could be easier. > > > + goto fail; > > + } > > + > > + io_buf_size = dev->dev_info.max_io_buf_bytes; > > + for (i = 0; i < q->q_depth; i++) { > > + q->ios[i].buf_addr = NULL; > > + > > + if (posix_memalign((void **)&q->ios[i].buf_addr, > > + getpagesize(), io_buf_size)) { > > + ublk_err("ublk dev %d queue %d io %d posix_memalign failed\n", > > + dev->dev_info.dev_id, q->q_id, i); > > + goto fail; > > + } > > same as previous comment .. OK. > > > + q->ios[i].flags = UBLKSRV_NEED_FETCH_RQ | UBLKSRV_IO_FREE; > > + } > > + > > + ret = ublk_setup_ring(&q->ring, ring_depth, cq_depth, > > + IORING_SETUP_SQE128 | IORING_SETUP_COOP_TASKRUN); > > + if (ret < 0) { > > + ublk_err("ublk dev %d queue %d setup io_uring failed %d\n", > > + q->dev->dev_info.dev_id, q->q_id, ret); > > + goto fail; > > + } > > + > > + io_uring_register_ring_fd(&q->ring); > > + > > + ret = io_uring_register_files(&q->ring, dev->fds, dev->nr_fds); > > + if (ret) { > > + ublk_err("ublk dev %d queue %d register files failed %d\n", > > + q->dev->dev_info.dev_id, q->q_id, ret); > > + goto fail; > > + } > > + > > + return 0; > > + fail: > > + ublk_queue_deinit(q); > > + ublk_err("ublk dev %d queue %d failed\n", > > + dev->dev_info.dev_id, q->q_id); > > + return -ENOMEM; > > +} > > + > > +static int ublk_dev_prep(struct ublk_dev *dev) > > +{ > > + int dev_id = dev->dev_info.dev_id; > > + char buf[64]; > > + int ret; > > + > > + snprintf(buf, 64, "%s%d", UBLKC_DEV, dev_id); > > + dev->fds[0] = open(buf, O_RDWR); > > + if (dev->fds[0] < 0) { > > + ret = -EBADF; > > + ublk_err("can't open %s, ret %d\n", buf, dev->fds[0]); > > + goto fail; > > + } > > + > > + if (dev->tgt.ops->init_tgt) > > + ret = dev->tgt.ops->init_tgt(dev); > > + else > > + ret = 0; > > + > > if ret is initialize to 0 at the time of declaration in above function > we can safely get rid of else above ? OK. > > > + return ret; > > +fail: > > + close(dev->fds[0]); > > + return ret; > > +} > > + > > +static void ublk_dev_unprep(struct ublk_dev *dev) > > +{ > > + if (dev->tgt.ops->deinit_tgt) > > + dev->tgt.ops->deinit_tgt(dev); > > + close(dev->fds[0]); > > +} > > + > > +static int ublk_queue_io_cmd(struct ublk_queue *q, > > + struct ublk_io *io, unsigned tag) > > +{ > > + struct ublksrv_io_cmd *cmd; > > + struct io_uring_sqe *sqe; > > + unsigned int cmd_op = 0; > > + __u64 user_data; > > + > > + /* only freed io can be issued */ > > + if (!(io->flags & UBLKSRV_IO_FREE)) > > + return 0; > > + > > + /* we issue because we need either fetching or committing */ > > + if (!(io->flags & > > + (UBLKSRV_NEED_FETCH_RQ | UBLKSRV_NEED_COMMIT_RQ_COMP))) > > + return 0; > > + > > + if (io->flags & UBLKSRV_NEED_COMMIT_RQ_COMP) > > + cmd_op = UBLK_IO_COMMIT_AND_FETCH_REQ; > > + else if (io->flags & UBLKSRV_NEED_FETCH_RQ) > > + cmd_op = UBLK_IO_FETCH_REQ; > > + > > + sqe = io_uring_get_sqe(&q->ring); > > + if (!sqe) { > > + ublk_err("%s: run out of sqe %d, tag %d\n", > > + __func__, q->q_id, tag); > > + return -1; > > + } > > + > > + cmd = (struct ublksrv_io_cmd *)ublk_get_sqe_cmd(sqe); > > + > > + if (cmd_op == UBLK_IO_COMMIT_AND_FETCH_REQ) > > + cmd->result = io->result; > > + > > + /* These fields should be written once, never change */ > > + ublk_set_sqe_cmd_op(sqe, cmd_op); > > + sqe->fd = 0; /* dev->fds[0] */ > > + sqe->opcode = IORING_OP_URING_CMD; > > + sqe->flags = IOSQE_FIXED_FILE; > > + sqe->rw_flags = 0; > > + cmd->tag = tag; > > + cmd->addr = (__u64)io->buf_addr; > > + cmd->q_id = q->q_id; > > + > > + user_data = build_user_data(tag, cmd_op, 0, 0); > > + io_uring_sqe_set_data64(sqe, user_data); > > + > > + io->flags = 0; > > + > > + q->cmd_inflight += 1; > > + > > + ublk_dbg(UBLK_DBG_IO_CMD, "%s: (qid %d tag %u cmd_op %u) iof %x stopping %d\n", > > + __func__, q->q_id, tag, cmd_op, > > + io->flags, !!(q->state & UBLKSRV_QUEUE_STOPPING)); > > + return 1; > > +} > > + > > +static int ublk_complete_io(struct ublk_queue *q, > > + unsigned tag, int res) > > +{ > > + struct ublk_io *io = &q->ios[tag]; > > + > > + ublk_mark_io_done(io, res); > > + > > + return ublk_queue_io_cmd(q, io, tag); > > +} > > + > > +static void ublk_submit_fetch_commands(struct ublk_queue *q) > > +{ > > + int i = 0; > > + > > + for (i = 0; i < q->q_depth; i++) > > + ublk_queue_io_cmd(q, &q->ios[i], i); > > +} > > + > > +static int ublk_queue_is_idle(struct ublk_queue *q) > > +{ > > + return !io_uring_sq_ready(&q->ring) && !q->io_inflight; > > +} > > + > > +static int ublk_queue_is_done(struct ublk_queue *q) > > +{ > > + return (q->state & UBLKSRV_QUEUE_STOPPING) && ublk_queue_is_idle(q); > > +} > > + > > +static void ublk_queue_discard_io_pages(struct ublk_queue *q) > > +{ > > + const struct ublk_dev *dev = q->dev; > > + unsigned int io_buf_size = dev->dev_info.max_io_buf_bytes; > > + int i = 0; > > + > > + for (i = 0; i < q->q_depth; i++) > > + madvise(q->ios[i].buf_addr, io_buf_size, MADV_DONTNEED); > > +} > > + > > +static void ublk_queue_idle_enter(struct ublk_queue *q) > > +{ > > + if (q->state & UBLKSRV_QUEUE_IDLE) > > + return; > > + > > + ublk_dbg(UBLK_DBG_QUEUE, "dev%d-q%d: enter idle %x\n", > > + q->dev->dev_info.dev_id, q->q_id, q->state); > > + ublk_queue_discard_io_pages(q); > > + q->state |= UBLKSRV_QUEUE_IDLE; > > +} > > + > > +static void ublk_queue_idle_exit(struct ublk_queue *q) > > +{ > > + if (q->state & UBLKSRV_QUEUE_IDLE) { > > + ublk_dbg(UBLK_DBG_QUEUE, "dev%d-q%d: exit idle %x\n", > > + q->dev->dev_info.dev_id, q->q_id, q->state); > > + q->state &= ~UBLKSRV_QUEUE_IDLE; > > + } > > +} > > + > > +static inline void ublksrv_handle_tgt_cqe(struct ublk_queue *q, > > + struct io_uring_cqe *cqe) > > +{ > > + unsigned tag = user_data_to_tag(cqe->user_data); > > + > > + if (cqe->res < 0 && cqe->res != -EAGAIN) > > + ublk_err("%s: failed tgt io: res %d qid %u tag %u, cmd_op %u\n", > > + __func__, cqe->res, q->q_id, > > + user_data_to_tag(cqe->user_data), > > + user_data_to_op(cqe->user_data)); > > + > > + if (q->tgt_ops->tgt_io_done) > > + q->tgt_ops->tgt_io_done(q, tag, cqe); > > +} > > + > > +static void ublk_handle_cqe(struct io_uring *r, > > + struct io_uring_cqe *cqe, void *data) > > +{ > > + struct ublk_queue *q = container_of(r, struct ublk_queue, ring); > > + unsigned tag = user_data_to_tag(cqe->user_data); > > + unsigned cmd_op = user_data_to_op(cqe->user_data); > > + int fetch = (cqe->res != UBLK_IO_RES_ABORT) && > > + !(q->state & UBLKSRV_QUEUE_STOPPING); > > + struct ublk_io *io; > > + > > + ublk_dbg(UBLK_DBG_IO_CMD, "%s: res %d (qid %d tag %u cmd_op %u target %d) stopping %d\n", > > + __func__, cqe->res, q->q_id, tag, cmd_op, > > + is_target_io(cqe->user_data), > > + (q->state & UBLKSRV_QUEUE_STOPPING)); > > + > > + /* Don't retrieve io in case of target io */ > > + if (is_target_io(cqe->user_data)) { > > + ublksrv_handle_tgt_cqe(q, cqe); > > + return; > > + } > > + > > + io = &q->ios[tag]; > > + q->cmd_inflight--; > > + > > + if (!fetch) { > > + q->state |= UBLKSRV_QUEUE_STOPPING; > > + io->flags &= ~UBLKSRV_NEED_FETCH_RQ; > > + } > > + > > + if (cqe->res == UBLK_IO_RES_OK) { > > + ublk_assert(tag < q->q_depth); > > + q->tgt_ops->queue_io(q, tag); > > + } else { > > + /* > > + * COMMIT_REQ will be completed immediately since no fetching > > + * piggyback is required. > > + * > > + * Marking IO_FREE only, then this io won't be issued since > > + * we only issue io with (UBLKSRV_IO_FREE | UBLKSRV_NEED_*) > > + * > > + * */ > > + io->flags = UBLKSRV_IO_FREE; > > + } > > if you can return; from above if we can remove the else, no biggy > though.. I think "else" is just fine. > > +} > > + > > +static int ublk_reap_events_uring(struct io_uring *r) > > +{ > > + struct io_uring_cqe *cqe; > > + unsigned head; > > + int count = 0; > > + > > + io_uring_for_each_cqe(r, head, cqe) { > > + ublk_handle_cqe(r, cqe, NULL); > > + count += 1; > > + } > > + io_uring_cq_advance(r, count); > > + > > + return count; > > +} > > + > > +static int ublk_process_io(struct ublk_queue *q) > > +{ > > + int ret, reapped; > > + struct __kernel_timespec ts = { > > + .tv_sec = UBLKSRV_IO_IDLE_SECS, > > + .tv_nsec = 0 > > + }; > > + struct __kernel_timespec *tsp = (q->state & UBLKSRV_QUEUE_IDLE) ? > > + NULL : &ts; > > + struct io_uring_cqe *cqe; > > + > > + ublk_dbg(UBLK_DBG_QUEUE, "dev%d-q%d: to_submit %d inflight cmd %u stopping %d\n", > > + q->dev->dev_info.dev_id, > > + q->q_id, io_uring_sq_ready(&q->ring), > > + q->cmd_inflight, > > + (q->state & UBLKSRV_QUEUE_STOPPING)); > > + > > + if (ublk_queue_is_done(q)) > > + return -ENODEV; > > + > > + ret = io_uring_submit_and_wait_timeout(&q->ring, &cqe, 1, tsp, NULL); > > + reapped = ublk_reap_events_uring(&q->ring); > > + > > + ublk_dbg(UBLK_DBG_QUEUE, "submit result %d, reapped %d stop %d idle %d\n", > > + ret, reapped, (q->state & UBLKSRV_QUEUE_STOPPING), > > + (q->state & UBLKSRV_QUEUE_IDLE)); > > + > > + if (!(q->state & UBLKSRV_QUEUE_STOPPING)) { > > + if (ret == -ETIME && reapped == 0 && ublk_queue_is_idle(q)) > > + ublk_queue_idle_enter(q); > > + else > > + ublk_queue_idle_exit(q); > > + } > > + return reapped; > > +} > > + > > +static void *ublk_io_handler_fn(void *data) > > +{ > > + struct ublk_queue *q = data; > > + int dev_id = q->dev->dev_info.dev_id; > > + int ret; > > + > > + ret = ublk_queue_init(q); > > + if (ret) { > > + ublk_err("ublk dev %d queue %d init queue failed\n", > > + dev_id, q->q_id); > > + return NULL; > > + } > > + > > + /* submit all io commands to ublk driver */ > > + ublk_submit_fetch_commands(q); > > + > > + ublk_dbg(UBLK_DBG_QUEUE, "tid %d: ublk dev %d queue %d started\n", > > + gettid(), > > + dev_id, q->q_id); > > + do { > > + if (ublk_process_io(q) < 0) > > + break; > > + } while (1); > > + > > + ublk_dbg(UBLK_DBG_QUEUE, "ublk dev %d queue %d exited\n", dev_id, q->q_id); > > + ublk_queue_deinit(q); > > + return NULL; > > +} > > + > > +static void ublk_set_parameters(struct ublk_dev *dev) > > +{ > > + int ret; > > + > > + ret = ublk_ctrl_set_params(dev, &dev->tgt.params); > > + if (ret) > > + ublk_err("dev %d set basic parameter failed %d\n", > > + dev->dev_info.dev_id, ret); > > +} > > + > > +static int ublk_start_daemon(struct ublk_dev *dev) > > +{ > > + int ret, i; > > + void *thread_ret; > > + const struct ublksrv_ctrl_dev_info *dinfo = &dev->dev_info; > > + > > + daemon(1, 1); > > + > > + ublk_dbg(UBLK_DBG_DEV, "%s enter\n", __func__); > > + > > + ret = ublk_dev_prep(dev); > > + if (!dev) > > + return -ENOMEM; > > + > > can dev be NULL ever here ? do we need to check ret instaed of dev? > (I don't think so since looking it ublk_dev_prep() it will not change > dev value and it is unconditionally deferenced before it's use). > > also shuldn't we return ret here as ublk_dev_prep() will retrun non > -ENOMEM value ? Good catch, it is really a bug. > > > + for (i = 0; i < dinfo->nr_hw_queues; i++) { > > + dev->q[i].dev = dev; > > + dev->q[i].q_id = i; > > + pthread_create(&dev->q[i].thread, NULL, > > + ublk_io_handler_fn, > > + &dev->q[i]); > > + } > > + > > + ublk_set_parameters(dev); > > + > > + /* everything is fine now, start us */ > > + ret = ublk_ctrl_start_dev(dev, getpid()); > > + if (ret < 0) > > + goto fail; > > + > > + ublk_ctrl_get_info(dev); > > + ublk_ctrl_dump(dev, true); > > + > > + /* wait until we are terminated */ > > + for (i = 0; i < dinfo->nr_hw_queues; i++) > > + pthread_join(dev->q[i].thread, &thread_ret); > > + fail: > > + ublk_dev_unprep(dev); > > + ublk_dbg(UBLK_DBG_DEV, "%s exit\n", __func__); > > + > > + return ret; > > +} > > + > > +static int cmd_dev_add(int argc, char *argv[]) > > +{ > > + static const struct option longopts[] = { > > + { "type", 1, NULL, 't' }, > > + { "number", 1, NULL, 'n' }, > > + { "queues", 1, NULL, 'q' }, > > + { "depth", 1, NULL, 'd' }, > > + { "debug_mask", 1, NULL, 0}, > > + { "quiet", 0, NULL, 0}, > > + { NULL } > > + }; > > + const struct ublk_tgt_ops *ops; > > + struct ublksrv_ctrl_dev_info *info; > > + struct ublk_dev *dev; > > + int ret, option_idx, opt; > > + const char *tgt_type = NULL; > > + int dev_id = -1; > > + unsigned nr_queues = 2, depth = UBLK_QUEUE_DEPTH; > > + > > + while ((opt = getopt_long(argc, argv, "-:t:n:d:q:", > > + longopts, &option_idx)) != -1) { > > + switch (opt) { > > + case 'n': > > + dev_id = strtol(optarg, NULL, 10); > > + break; > > + case 't': > > + tgt_type = optarg; > > + break; > > + case 'q': > > + nr_queues = strtol(optarg, NULL, 10); > > + break; > > + case 'd': > > + depth = strtol(optarg, NULL, 10); > > + break; > > + case 0: > > + if (!strcmp(longopts[option_idx].name, "debug_mask")) > > + ublk_dbg_mask = strtol(optarg, NULL, 16); > > + if (!strcmp(longopts[option_idx].name, "quiet")) > > + ublk_dbg_mask = 0; > > + break; > > + } > > + } > > + > > + optind = 0; > > + > > + ops = ublk_find_tgt(tgt_type); > > + if (!ops) { > > + ublk_err("%s: no such tgt type, type %s\n", > > + __func__, tgt_type); > > + return -ENODEV; > > + } > > + > > + if (nr_queues > UBLK_MAX_QUEUES || depth > UBLK_QUEUE_DEPTH) { > > + ublk_err("%s: invalid nr_queues or depth queues %u depth %u\n", > > + __func__, nr_queues, depth); > > + return -EINVAL; > > + } > > + > > + dev = ublk_ctrl_init(); > > + if (!dev) { > > + ublk_err("%s: can't alloc dev id %d, type %s\n", > > + __func__, dev_id, tgt_type); > > + return -ENOMEM; > > + } > > + > > + info = &dev->dev_info; > > + info->dev_id = dev_id; > > + info->nr_hw_queues = nr_queues; > > + info->queue_depth = depth; > > + dev->tgt.ops = ops; > > + dev->tgt.argc = argc; > > + dev->tgt.argv = argv; > > + > > + ret = ublk_ctrl_add_dev(dev); > > + if (ret < 0) { > > + ublk_err("%s: can't add dev id %d, type %s ret %d\n", > > + __func__, dev_id, tgt_type, ret); > > + goto fail; > > + } > > + > > + ret = ublk_start_daemon(dev); > > + if (ret < 0) { > > + ublk_err("%s: can't start daemon id %d, type %s\n", > > + __func__, dev_id, tgt_type); > > + goto fail_del; > > + } > > +fail_del: > > + ublk_ctrl_del_dev(dev); > > +fail: > > + ublk_ctrl_deinit(dev); > > + return ret; > > +} > > + > > +static int ublk_stop_io_daemon(const struct ublk_dev *dev) > > +{ > > + int daemon_pid = dev->dev_info.ublksrv_pid; > > + int cnt = 0, ret; > > + > > + if (daemon_pid == -1) > > + return 0; > > + > > + /* wait until daemon is exited, or timeout after 3 seconds */ > > + do { > > + ret = kill(daemon_pid, 0); > > + if (ret) > > + break; > > + usleep(500000); > > + cnt++; > > + } while (!ret && cnt < 6); > > + > > + ublk_dbg(UBLK_DBG_DEV, "%s: pid %d ret %d\n", __func__, daemon_pid, ret); > > + > > + return ret != 0 ? 0 : -1; > > +} > > + > > +static int __cmd_dev_del(int number, bool log) > > +{ > > + struct ublk_dev *dev; > > + int ret; > > + > > + dev = ublk_ctrl_init(); > > + dev->dev_info.dev_id = number; > > + > > + ret = ublk_ctrl_get_info(dev); > > + if (ret < 0) { > > + goto fail; > > + } > > + > > + ret = ublk_ctrl_stop_dev(dev); > > + if (ret < 0) { > > + if (log) > > + ublk_err("stop dev %d failed\n", number); > > + goto fail; > > + } > > + > > + ret = ublk_stop_io_daemon(dev); > > + if (ret < 0) { > > + if (log) > > + ublk_err("stop daemon %d failed\n", number); > > + } > > + > > + ublk_ctrl_del_dev(dev); > > +fail: > > + ublk_ctrl_deinit(dev); > > + return ret; > > +} > > + > > +static int cmd_dev_del(int argc, char *argv[]) > > +{ > > + static const struct option longopts[] = { > > + { "number", 1, NULL, 'n' }, > > + { "all", 0, NULL, 'a' }, > > + { "debug_mask", 1, NULL, 0}, > > + { NULL } > > + }; > > + int number = -2; > > + int opt, i, option_idx; > > + > > + while ((opt = getopt_long(argc, argv, "n:a", > > + longopts, &option_idx)) != -1) { > > + switch (opt) { > > + case 'a': > > + number = -1; > > + break; > > + > > + case 'n': > > + number = strtol(optarg, NULL, 10); > > + break; > > + case 0: > > + if (!strcmp(longopts[option_idx].name, "debug_mask")) > > + ublk_dbg_mask = strtol(optarg, NULL, 16); > > + break; > > + } > > + } > > + > > + if (number >= 0) > > + return __cmd_dev_del(number, true); > > + else if (number != -1) { > > + ublk_err("%s: pass wrong devid or not delete via -a\n"); > > + return -EINVAL; > > + } > > + > > + for (i = 0; i < 255; i++) > > + __cmd_dev_del(i, false); > > + > > + return 0; > > +} > > + > > +static int __cmd_dev_list(int number, bool log) > > +{ > > + struct ublk_dev *dev = ublk_ctrl_init(); > > + int ret; > > + > > + dev->dev_info.dev_id = number; > > + > > + ret = ublk_ctrl_get_info(dev); > > + if (ret < 0) { > > + if (log) > > + ublk_err("%s: can't get dev info from %d: %d\n", > > + __func__, number, ret); > > + } else { > > + ublk_ctrl_dump(dev, false); > > + } > > + > > + ublk_ctrl_deinit(dev); > > + > > + return ret; > > +} > > + > > + > > +static int cmd_dev_list(int argc, char *argv[]) > > +{ > > + static const struct option longopts[] = { > > + { "number", 1, NULL, 'n' }, > > + { "all", 0, NULL, 'a' }, > > + { NULL } > > + }; > > + int number = -1; > > + int opt, i; > > + > > + while ((opt = getopt_long(argc, argv, "n:a", > > + longopts, NULL)) != -1) { > > + switch (opt) { > > + case 'a': > > + break; > > + > > + case 'n': > > + number = strtol(optarg, NULL, 10); > > + break; > > + } > > + } > > + > > + if (number >= 0) > > + return __cmd_dev_list(number, true); > > + > > + for (i = 0; i < 255; i++) > > + __cmd_dev_list(i, false); > > + > > + return 0; > > +} > > + > > +static int cmd_dev_help(int argc, char *argv[]) > > +{ > > + printf("%s add -t {null|loop} [-q nr_queues] [-d depth] [-n dev_id] \n", > > + argv[0]); > > + printf("\t default: nr_queues=2(max 4), depth=128(max 128), dev_id=-1(auto allocation)\n"); > > + printf("\t -t loop -f backing_file \n"); > > + printf("\t -t null\n"); > > + printf("%s del [-n dev_id] -a \n", argv[0]); > > + printf("\t -a delete all devices -n delete specified device\n"); > > + printf("%s list [-n dev_id] -a \n", argv[0]); > > + printf("\t -a list all devices, -n list specified device, default -a \n"); > > + > > + return 0; > > +} > > + > > +static int ublk_null_tgt_init(struct ublk_dev *dev) > > +{ > > + const struct ublksrv_ctrl_dev_info *info = &dev->dev_info; > > + unsigned long dev_size = 250UL << 30; > > + > > + dev->tgt.dev_size = dev_size; > > + dev->tgt.params = (struct ublk_params) { > > + .types = UBLK_PARAM_TYPE_BASIC, > > + .basic = { > > + .logical_bs_shift = 9, > > + .physical_bs_shift = 12, > > + .io_opt_shift = 12, > > + .io_min_shift = 9, > > + .max_sectors = info->max_io_buf_bytes >> 9, > > + .dev_sectors = dev_size >> 9, > > + }, > > + }; > > + > > + return 0; > > +} > > + > > +static int ublk_null_queue_io(struct ublk_queue *q, int tag) > > +{ > > + const struct ublksrv_io_desc *iod = ublk_get_iod(q, tag); > > + > > + ublk_complete_io(q, tag, iod->nr_sectors << 9); > > + > > + return 0; > > +} > > + > > +static int loop_queue_tgt_io(struct ublk_queue *q, int tag) > > +{ > > + const struct ublksrv_io_desc *iod = ublk_get_iod(q, tag); > > + struct io_uring_sqe *sqe = io_uring_get_sqe(&q->ring); > > + unsigned ublk_op = ublksrv_get_op(iod); > > + > > + if (!sqe) > > + return -ENOMEM; > > + > > + switch (ublk_op) { > > + case UBLK_IO_OP_FLUSH: > > + io_uring_prep_sync_file_range(sqe, 1 /*fds[1]*/, > > + iod->nr_sectors << 9, > > + iod->start_sector << 9, > > + IORING_FSYNC_DATASYNC); > > + io_uring_sqe_set_flags(sqe, IOSQE_FIXED_FILE); > > + break; > > + case UBLK_IO_OP_WRITE_ZEROES: > > + case UBLK_IO_OP_DISCARD: > > + return -ENOTSUP; > > + case UBLK_IO_OP_READ: > > + io_uring_prep_read(sqe, 1 /*fds[1]*/, > > + (void *)iod->addr, > > + iod->nr_sectors << 9, > > + iod->start_sector << 9); > > + io_uring_sqe_set_flags(sqe, IOSQE_FIXED_FILE); > > + break; > > + case UBLK_IO_OP_WRITE: > > + io_uring_prep_write(sqe, 1 /*fds[1]*/, > > + (void *)iod->addr, > > + iod->nr_sectors << 9, > > + iod->start_sector << 9); > > + io_uring_sqe_set_flags(sqe, IOSQE_FIXED_FILE); > > + break; > > + default: > > + return -EINVAL; > > + } > > + > > + q->io_inflight++; > > + /* bit63 marks us as tgt io */ > > + sqe->user_data = build_user_data(tag, ublk_op, 0, 1); > > + > > + ublk_dbg(UBLK_DBG_IO, "%s: tag %d ublk io %x %llx %u\n", __func__, tag, > > + iod->op_flags, iod->start_sector, iod->nr_sectors << 9); > > + return 1; > > +} > > + > > +static int ublk_loop_queue_io(struct ublk_queue *q, int tag) > > +{ > > + int queued = loop_queue_tgt_io(q, tag); > > + > > + if (queued < 0) > > + ublk_complete_io(q, tag, queued); > > + > > + return 0; > > +} > > + > > +static void ublk_loop_io_done(struct ublk_queue *q, int tag, > > + const struct io_uring_cqe *cqe) > > +{ > > + int cqe_tag = user_data_to_tag(cqe->user_data); > > + > > + ublk_assert(tag == cqe_tag); > > + ublk_complete_io(q, tag, cqe->res); > > + q->io_inflight--; > > +} > > + > > +static void ublk_loop_tgt_deinit(struct ublk_dev *dev) > > +{ > > + fsync(dev->fds[1]); > > + close(dev->fds[1]); > > +} > > + > > +static int ublk_loop_tgt_init(struct ublk_dev *dev) > > +{ > > + static const struct option lo_longopts[] = { > > + { "file", 1, NULL, 'f' }, > > + { NULL } > > + }; > > + unsigned long long bytes; > > + char **argv = dev->tgt.argv; > > + int argc = dev->tgt.argc; > > + char *file = NULL; > > + struct stat st; > > + int fd, opt; > > + struct ublk_params p = { > > + .types = UBLK_PARAM_TYPE_BASIC, > > + .basic = { > > + .logical_bs_shift = 9, > > + .physical_bs_shift = 12, > > + .io_opt_shift = 12, > > + .io_min_shift = 9, > > + .max_sectors = dev->dev_info.max_io_buf_bytes >> 9, > > + }, > > + }; > > + > > + while ((opt = getopt_long(argc, argv, "-:f:", > > + lo_longopts, NULL)) != -1) { > > + switch (opt) { > > + case 'f': > > + file = strdup(optarg); > > + break; > > + } > > + } > > + > > + ublk_dbg(UBLK_DBG_DEV, "%s: file %s\n", __func__, file); > > + > > + if (!file) > > + return -EINVAL; > > + > > + fd = open(file, O_RDWR); > > + if (fd < 0) { > > + ublk_err( "%s: backing file %s can't be opened\n", > > + __func__, file); > > + return -EBADF; > > + } > > + > > + if (fstat(fd, &st) < 0) { > > + close(fd); > > + return -EBADF; > > + } > > + > > + if (S_ISBLK(st.st_mode)) { > > + unsigned int bs, pbs; > > + > > + if (ioctl(fd, BLKGETSIZE64, &bytes) != 0) > > + return -EBADF; > > + if (ioctl(fd, BLKSSZGET, &bs) != 0) > > + return -1; > > + if (ioctl(fd, BLKPBSZGET, &pbs) != 0) > > + return -1; > > + p.basic.logical_bs_shift = ilog2(bs); > > + p.basic.physical_bs_shift = ilog2(pbs); > > + } else if (S_ISREG(st.st_mode)) { > > + bytes = st.st_size; > > + } else { > > + bytes = 0; > > + } > > + > > + if (fcntl(fd, F_SETFL, O_DIRECT)) { > > + p.basic.logical_bs_shift = 9; > > + p.basic.physical_bs_shift = 12; > > + ublk_log("%s: ublk-loop fallback to buffered IO\n", __func__); > > + } > > + > > + dev->tgt.dev_size = bytes; > > + p.basic.dev_sectors = bytes >> 9; > > + dev->fds[1] = fd; > > + dev->nr_fds += 1; > > + dev->tgt.params = p; > > + > > + return 0; > > +} > > + > > +const struct ublk_tgt_ops tgt_ops_list[] = { > > + { > > + .name = "null", > > + .init_tgt = ublk_null_tgt_init, > > + .queue_io = ublk_null_queue_io, > > + }, > > + > > + { > > + .name = "loop", > > + .init_tgt = ublk_loop_tgt_init, > > + .deinit_tgt = ublk_loop_tgt_deinit, > > + .queue_io = ublk_loop_queue_io, > > + .tgt_io_done = ublk_loop_io_done, > > + }, > > +}; > > + > > +static const struct ublk_tgt_ops *ublk_find_tgt(const char *name) > > +{ > > + const struct ublk_tgt_ops *ops; > > + int i; > > + > > + if (name == NULL) > > + return NULL; > > + > > + for (i = 0; sizeof(tgt_ops_list) / sizeof(*ops); i++) > > + if (strcmp(tgt_ops_list[i].name, name) == 0) > > + return &tgt_ops_list[i]; > > + return NULL; > > +} > > + > > +int main(int argc, char *argv[]) > > +{ > > + const char *cmd = argv[1]; > > + int ret = -EINVAL; > > + > > + if (argc == 1) > > + goto out; > > + > > + if (!strcmp(cmd, "add")) > > + ret = cmd_dev_add(argc, argv); > > + else if (!strcmp(cmd, "del")) > > + ret = cmd_dev_del(argc, argv); > > + else if (!strcmp(cmd, "list")) > > + ret = cmd_dev_list(argc, argv); > > + else if (!strcmp(cmd, "help")) > > + ret = cmd_dev_help(argc, argv); > > +out: > > + if (ret) > > + cmd_dev_help(argc, argv); > > + > > + return ret; > > +} thanks, Ming
On Fri, Feb 24, 2023 at 04:40:45PM +0800, Ming Lei wrote: > Hi Chaitanya, > > Thanks for the review! > > On Tue, Feb 21, 2023 at 07:58:48PM +0000, Chaitanya Kulkarni wrote: > > On 2/19/2023 7:46 PM, Ming Lei wrote: > > > Prepare for adding ublk related test: > > > > > > 1) ublk delete is sync removal, this way is convenient to > > > blkg/queue/disk instance leak issue > > > > > > 2) mini ublk has two builtin target(null, loop), and loop IO is > > > handled by io_uring, so we can use ublk to cover part of io_uring > > > workloads > > > > > > 3) not like loop/nbd, ublk won't pre-allocate/add disk, and always > > > add/delete disk dynamically, this way may cover disk plug & unplug > > > tests > > > > > > 4) ublk specific test given people starts to use it, so better to > > > let blktest cover ublk related tests > > > > > > Add mini ublk source for test purpose only, which is easy to use: > > > > > > ./miniublk add -t {null|loop} [-q nr_queues] [-d depth] [-n dev_id] > > > default: nr_queues=2(max 4), depth=128(max 128), dev_id=-1(auto allocation) > > > -t loop -f backing_file > > > -t null > > > ./miniublk del [-n dev_id] -a > > > -a delete all devices, -n delete specified device > > > ./miniublk list [-n dev_id] -a > > > -a list all devices, -n list specified device, default -a > > > > > > miniublk depends on liburing 2.2, adds HAVE_LIBURING for checking if > > > liburing 2.2 exists; also add HAVE_UBLK_HEADER for checking ublk kernel > > > UAPI header exists. If either of two dependencies can't be met, simply > > > ignore miniublk target. > > > > > > Also v6.0 is the 1st linux kernel release with ublk. > > > > > > Signed-off-by: Ming Lei <ming.lei@redhat.com> > > > --- > > > src/.gitignore | 1 + > > > src/Makefile | 18 + > > > src/miniublk.c | 1376 ++++++++++++++++++++++++++++++++++++++++++++++++ > > > 3 files changed, 1395 insertions(+) > > > create mode 100644 src/miniublk.c > > > > > > diff --git a/src/.gitignore b/src/.gitignore > > > index 355bed3..df7aff5 100644 > > > --- a/src/.gitignore > > > +++ b/src/.gitignore > > > @@ -8,3 +8,4 @@ > > > /sg/dxfer-from-dev > > > /sg/syzkaller1 > > > /zbdioctl > > > +/miniublk > > > diff --git a/src/Makefile b/src/Makefile > > > index 3b587f6..81c6541 100644 > > > --- a/src/Makefile > > > +++ b/src/Makefile > > > @@ -2,6 +2,10 @@ HAVE_C_HEADER = $(shell if echo "\#include <$(1)>" | \ > > > $(CC) -E - > /dev/null 2>&1; then echo "$(2)"; \ > > > else echo "$(3)"; fi) > > > > > > +HAVE_C_MACRO = $(shell if echo "#include <$(1)>" | \ > > > + $(CC) -E - 2>&1 /dev/null | grep $(2) > /dev/null 2>&1; \ > > > + then echo 1;else echo 0; fi) > > > + > > > C_TARGETS := \ > > > loblksize \ > > > loop_change_fd \ > > > @@ -13,16 +17,27 @@ C_TARGETS := \ > > > sg/syzkaller1 \ > > > zbdioctl > > > > > > +C_MINIUBLK := miniublk > > > + > > > +HAVE_LIBURING := $(call HAVE_C_MACRO,liburing.h,IORING_OP_URING_CMD) > > > +HAVE_UBLK_HEADER := $(call HAVE_C_HEADER,linux/ublk_cmd.h,1) > > > + > > > CXX_TARGETS := \ > > > discontiguous-io > > > > > > +ifeq ($(HAVE_LIBURING)$(HAVE_UBLK_HEADER), 11) > > > +TARGETS := $(C_TARGETS) $(CXX_TARGETS) $(C_MINIUBLK) > > > +else > > > +$(info Skip $(C_MINIUBLK) build due to missing kernel header(v6.0+) or liburing(2.2+)) > > > TARGETS := $(C_TARGETS) $(CXX_TARGETS) > > > +endif > > > > > > CONFIG_DEFS := $(call HAVE_C_HEADER,linux/blkzoned.h,-DHAVE_LINUX_BLKZONED_H) > > > > > > override CFLAGS := -O2 -Wall -Wshadow $(CFLAGS) $(CONFIG_DEFS) > > > override CXXFLAGS := -O2 -std=c++11 -Wall -Wextra -Wshadow -Wno-sign-compare \ > > > -Werror $(CXXFLAGS) $(CONFIG_DEFS) > > > +MINIUBLK_FLAGS := -D_GNU_SOURCE -lpthread -luring > > > > > > all: $(TARGETS) > > > > > > @@ -39,4 +54,7 @@ $(C_TARGETS): %: %.c > > > $(CXX_TARGETS): %: %.cpp > > > $(CXX) $(CPPFLAGS) $(CXXFLAGS) -o $@ $^ > > > > > > +$(C_MINIUBLK): %: miniublk.c > > > + $(CC) $(CFLAGS) $(MINIUBLK_FLAGS) -o $@ miniublk.c > > > + > > > .PHONY: all clean install > > > diff --git a/src/miniublk.c b/src/miniublk.c > > > new file mode 100644 > > > index 0000000..e84ba41 > > > --- /dev/null > > > +++ b/src/miniublk.c > > > @@ -0,0 +1,1376 @@ > > > +// SPDX-License-Identifier: GPL-3.0+ > > > +// Copyright (C) 2023 Ming Lei > > > + > > > +/* > > > + * io_uring based mini ublk implementation with null/loop target, > > > + * for test purpose only. > > > + * > > > + * So please keep it clean & simple & reliable. > > > + */ > > > + > > > +#include <unistd.h> > > > +#include <stdlib.h> > > > +#include <assert.h> > > > +#include <stdio.h> > > > +#include <stdarg.h> > > > +#include <string.h> > > > +#include <pthread.h> > > > +#include <getopt.h> > > > +#include <limits.h> > > > +#include <sys/syscall.h> > > > +#include <sys/mman.h> > > > +#include <sys/ioctl.h> > > > +#include <liburing.h> > > > +#include <linux/ublk_cmd.h> > > > + > > > +#define CTRL_DEV "/dev/ublk-control" > > > +#define UBLKC_DEV "/dev/ublkc" > > > +#define UBLK_CTRL_RING_DEPTH 32 > > > + > > > +/* queue idle timeout */ > > > +#define UBLKSRV_IO_IDLE_SECS 20 > > > + > > > +#define UBLK_IO_MAX_BYTES 65536 > > > +#define UBLK_MAX_QUEUES 4 > > > +#define UBLK_QUEUE_DEPTH 128 > > > + > > > +#define UBLK_DBG_DEV (1U << 0) > > > +#define UBLK_DBG_QUEUE (1U << 1) > > > +#define UBLK_DBG_IO_CMD (1U << 2) > > > +#define UBLK_DBG_IO (1U << 3) > > > +#define UBLK_DBG_CTRL_CMD (1U << 4) > > > +#define UBLK_LOG (1U << 5) > > > + > > > +struct ublk_dev; > > > +struct ublk_queue; > > > + > > > +struct ublk_ctrl_cmd_data { > > > + unsigned short cmd_op; > > > > perhaps use enum type to avoid any type mismatach errors in future.. > > Sounds good. oops, here the command op is actually defined in uapi header, which can't be changed to enum any more, but still better to align the type with uring_cmd type(u32). Thanks, Ming
On Feb 24, 2023 / 16:28, Ming Lei wrote: > On Fri, Feb 24, 2023 at 03:52:28PM +0800, Ziyang Zhang wrote: > > On 2023/2/20 11:46, Ming Lei wrote: > > > > [...] > > > > > > > > Signed-off-by: Ming Lei <ming.lei@redhat.com> > > > --- > > > src/.gitignore | 1 + > > > src/Makefile | 18 + > > > src/miniublk.c | 1376 ++++++++++++++++++++++++++++++++++++++++++++++++ > > > 3 files changed, 1395 insertions(+) > > > create mode 100644 src/miniublk.c > > > > > > diff --git a/src/.gitignore b/src/.gitignore > > > index 355bed3..df7aff5 100644 > > > --- a/src/.gitignore > > > +++ b/src/.gitignore > > > @@ -8,3 +8,4 @@ > > > /sg/dxfer-from-dev > > > /sg/syzkaller1 > > > /zbdioctl > > > +/miniublk > > > diff --git a/src/Makefile b/src/Makefile > > > index 3b587f6..81c6541 100644 > > > --- a/src/Makefile > > > +++ b/src/Makefile > > > @@ -2,6 +2,10 @@ HAVE_C_HEADER = $(shell if echo "\#include <$(1)>" | \ > > > $(CC) -E - > /dev/null 2>&1; then echo "$(2)"; \ > > > else echo "$(3)"; fi) > > > > > > +HAVE_C_MACRO = $(shell if echo "#include <$(1)>" | \ > > Hi Ming, > > > > It should be "\#include", not "#include". You miss a "\". > > "\#include" won't work for checking the macro of IORING_OP_URING_CMD. > > [root@ktest-36 linux]# echo "\#include <liburing.h>" | gcc -E - > # 0 "<stdin>" > # 0 "<built-in>" > # 0 "<command-line>" > # 1 "/usr/include/stdc-predef.h" 1 3 4 > # 0 "<command-line>" 2 > # 1 "<stdin>" > \#include <liburing.h> I also tried and observed the same symptom. HAVE_C_MACRO works well without the backslash. Adding the backslash, it fails. I think Ziyang made the comment because HAVE_C_HEADER has the backslash. (Thanks for catching the difference between HAVA_C_HEADER and HAVE_C_MACRO.) I think another fix is needed to remove that backslash from HAVE_C_HEADER. I've create a one liner fix patch quickly [1]. It looks ok for blktests CI. I will revisit it after Ming's patches get settled. [1] https://github.com/osandov/blktests/pull/112/commits/dd5852e69abc3247d7b0ec4faf916a395378362d
On 2023/2/24 19:41, Shinichiro Kawasaki wrote: > On Feb 24, 2023 / 16:28, Ming Lei wrote: >> On Fri, Feb 24, 2023 at 03:52:28PM +0800, Ziyang Zhang wrote: >>> On 2023/2/20 11:46, Ming Lei wrote: >>> >>> [...] >>> >>>> >>>> Signed-off-by: Ming Lei <ming.lei@redhat.com> >>>> --- >>>> src/.gitignore | 1 + >>>> src/Makefile | 18 + >>>> src/miniublk.c | 1376 ++++++++++++++++++++++++++++++++++++++++++++++++ >>>> 3 files changed, 1395 insertions(+) >>>> create mode 100644 src/miniublk.c >>>> >>>> diff --git a/src/.gitignore b/src/.gitignore >>>> index 355bed3..df7aff5 100644 >>>> --- a/src/.gitignore >>>> +++ b/src/.gitignore >>>> @@ -8,3 +8,4 @@ >>>> /sg/dxfer-from-dev >>>> /sg/syzkaller1 >>>> /zbdioctl >>>> +/miniublk >>>> diff --git a/src/Makefile b/src/Makefile >>>> index 3b587f6..81c6541 100644 >>>> --- a/src/Makefile >>>> +++ b/src/Makefile >>>> @@ -2,6 +2,10 @@ HAVE_C_HEADER = $(shell if echo "\#include <$(1)>" | \ >>>> $(CC) -E - > /dev/null 2>&1; then echo "$(2)"; \ >>>> else echo "$(3)"; fi) >>>> >>>> +HAVE_C_MACRO = $(shell if echo "#include <$(1)>" | \ >>> Hi Ming, >>> >>> It should be "\#include", not "#include". You miss a "\". >> >> "\#include" won't work for checking the macro of IORING_OP_URING_CMD. >> >> [root@ktest-36 linux]# echo "\#include <liburing.h>" | gcc -E - >> # 0 "<stdin>" >> # 0 "<built-in>" >> # 0 "<command-line>" >> # 1 "/usr/include/stdc-predef.h" 1 3 4 >> # 0 "<command-line>" 2 >> # 1 "<stdin>" >> \#include <liburing.h> > > I also tried and observed the same symptom. HAVE_C_MACRO works well without the > backslash. Adding the backslash, it fails. > > I think Ziyang made the comment because HAVE_C_HEADER has the backslash. (Thanks > for catching the difference between HAVA_C_HEADER and HAVE_C_MACRO.) I think > another fix is needed to remove that backslash from HAVE_C_HEADER. I've create > a one liner fix patch quickly [1]. It looks ok for blktests CI. I will revisit > it after Ming's patches get settled. > > [1] https://github.com/osandov/blktests/pull/112/commits/dd5852e69abc3247d7b0ec4faf916a395378362d > Hello, Sorry, I am not familiar with shell script. But **without** the backslash, I get this error: $ make make -C src all make[1]: Entering directory '/home/alinux/workspace/blktests/src' Makefile:5: *** unterminated call to function 'shell': missing ')'. Stop. make[1]: Leaving directory '/home/alinux/workspace/blktests/src' make: *** [Makefile:5: all] Error 2 Regards, Zhang
On Feb 27, 2023 / 10:57, Ziyang Zhang wrote: > On 2023/2/24 19:41, Shinichiro Kawasaki wrote: > > On Feb 24, 2023 / 16:28, Ming Lei wrote: > >> On Fri, Feb 24, 2023 at 03:52:28PM +0800, Ziyang Zhang wrote: > >>> On 2023/2/20 11:46, Ming Lei wrote: > >>> > >>> [...] > >>> > >>>> > >>>> Signed-off-by: Ming Lei <ming.lei@redhat.com> > >>>> --- > >>>> src/.gitignore | 1 + > >>>> src/Makefile | 18 + > >>>> src/miniublk.c | 1376 ++++++++++++++++++++++++++++++++++++++++++++++++ > >>>> 3 files changed, 1395 insertions(+) > >>>> create mode 100644 src/miniublk.c > >>>> > >>>> diff --git a/src/.gitignore b/src/.gitignore > >>>> index 355bed3..df7aff5 100644 > >>>> --- a/src/.gitignore > >>>> +++ b/src/.gitignore > >>>> @@ -8,3 +8,4 @@ > >>>> /sg/dxfer-from-dev > >>>> /sg/syzkaller1 > >>>> /zbdioctl > >>>> +/miniublk > >>>> diff --git a/src/Makefile b/src/Makefile > >>>> index 3b587f6..81c6541 100644 > >>>> --- a/src/Makefile > >>>> +++ b/src/Makefile > >>>> @@ -2,6 +2,10 @@ HAVE_C_HEADER = $(shell if echo "\#include <$(1)>" | \ > >>>> $(CC) -E - > /dev/null 2>&1; then echo "$(2)"; \ > >>>> else echo "$(3)"; fi) > >>>> > >>>> +HAVE_C_MACRO = $(shell if echo "#include <$(1)>" | \ > >>> Hi Ming, > >>> > >>> It should be "\#include", not "#include". You miss a "\". > >> > >> "\#include" won't work for checking the macro of IORING_OP_URING_CMD. > >> > >> [root@ktest-36 linux]# echo "\#include <liburing.h>" | gcc -E - > >> # 0 "<stdin>" > >> # 0 "<built-in>" > >> # 0 "<command-line>" > >> # 1 "/usr/include/stdc-predef.h" 1 3 4 > >> # 0 "<command-line>" 2 > >> # 1 "<stdin>" > >> \#include <liburing.h> > > > > I also tried and observed the same symptom. HAVE_C_MACRO works well without the > > backslash. Adding the backslash, it fails. > > > > I think Ziyang made the comment because HAVE_C_HEADER has the backslash. (Thanks > > for catching the difference between HAVA_C_HEADER and HAVE_C_MACRO.) I think > > another fix is needed to remove that backslash from HAVE_C_HEADER. I've create > > a one liner fix patch quickly [1]. It looks ok for blktests CI. I will revisit > > it after Ming's patches get settled. > > > > [1] https://github.com/osandov/blktests/pull/112/commits/dd5852e69abc3247d7b0ec4faf916a395378362d > > > > Hello, > > Sorry, I am not familiar with shell script. But **without** the backslash, > I get this error: > > $ make > make -C src all > make[1]: Entering directory '/home/alinux/workspace/blktests/src' > Makefile:5: *** unterminated call to function 'shell': missing ')'. Stop. > make[1]: Leaving directory '/home/alinux/workspace/blktests/src' > make: *** [Makefile:5: all] Error 2 I see... I googled and learned that make version 4.3 introduced this '# inside macro' handling difference [2]. I guess your make has version older than 4.3, isn't it? [2] https://lwn.net/Articles/810071/ Per the the LWN article [2], the fix should be as follows. It works as expected on my system with make version 4.3. Could you try it on your system? diff --git a/src/Makefile b/src/Makefile index 81c6541..322eb1c 100644 --- a/src/Makefile +++ b/src/Makefile @@ -1,8 +1,10 @@ -HAVE_C_HEADER = $(shell if echo "\#include <$(1)>" | \ +H := \# + +HAVE_C_HEADER = $(shell if echo "$(H)include <$(1)>" | \ $(CC) -E - > /dev/null 2>&1; then echo "$(2)"; \ else echo "$(3)"; fi) -HAVE_C_MACRO = $(shell if echo "#include <$(1)>" | \ +HAVE_C_MACRO = $(shell if echo -e "$(H)include <$(1)>" | \ $(CC) -E - 2>&1 /dev/null | grep $(2) > /dev/null 2>&1; \ then echo 1;else echo 0; fi)
On 2023/2/27 13:41, Shinichiro Kawasaki wrote: > On Feb 27, 2023 / 10:57, Ziyang Zhang wrote: >> On 2023/2/24 19:41, Shinichiro Kawasaki wrote: >>> On Feb 24, 2023 / 16:28, Ming Lei wrote: >>>> On Fri, Feb 24, 2023 at 03:52:28PM +0800, Ziyang Zhang wrote: >>>>> On 2023/2/20 11:46, Ming Lei wrote: >>>>> >>>>> [...] >>>>> >>>>>> >>>>>> Signed-off-by: Ming Lei <ming.lei@redhat.com> >>>>>> --- >>>>>> src/.gitignore | 1 + >>>>>> src/Makefile | 18 + >>>>>> src/miniublk.c | 1376 ++++++++++++++++++++++++++++++++++++++++++++++++ >>>>>> 3 files changed, 1395 insertions(+) >>>>>> create mode 100644 src/miniublk.c >>>>>> >>>>>> diff --git a/src/.gitignore b/src/.gitignore >>>>>> index 355bed3..df7aff5 100644 >>>>>> --- a/src/.gitignore >>>>>> +++ b/src/.gitignore >>>>>> @@ -8,3 +8,4 @@ >>>>>> /sg/dxfer-from-dev >>>>>> /sg/syzkaller1 >>>>>> /zbdioctl >>>>>> +/miniublk >>>>>> diff --git a/src/Makefile b/src/Makefile >>>>>> index 3b587f6..81c6541 100644 >>>>>> --- a/src/Makefile >>>>>> +++ b/src/Makefile >>>>>> @@ -2,6 +2,10 @@ HAVE_C_HEADER = $(shell if echo "\#include <$(1)>" | \ >>>>>> $(CC) -E - > /dev/null 2>&1; then echo "$(2)"; \ >>>>>> else echo "$(3)"; fi) >>>>>> >>>>>> +HAVE_C_MACRO = $(shell if echo "#include <$(1)>" | \ >>>>> Hi Ming, >>>>> >>>>> It should be "\#include", not "#include". You miss a "\". >>>> >>>> "\#include" won't work for checking the macro of IORING_OP_URING_CMD. >>>> >>>> [root@ktest-36 linux]# echo "\#include <liburing.h>" | gcc -E - >>>> # 0 "<stdin>" >>>> # 0 "<built-in>" >>>> # 0 "<command-line>" >>>> # 1 "/usr/include/stdc-predef.h" 1 3 4 >>>> # 0 "<command-line>" 2 >>>> # 1 "<stdin>" >>>> \#include <liburing.h> >>> >>> I also tried and observed the same symptom. HAVE_C_MACRO works well without the >>> backslash. Adding the backslash, it fails. >>> >>> I think Ziyang made the comment because HAVE_C_HEADER has the backslash. (Thanks >>> for catching the difference between HAVA_C_HEADER and HAVE_C_MACRO.) I think >>> another fix is needed to remove that backslash from HAVE_C_HEADER. I've create >>> a one liner fix patch quickly [1]. It looks ok for blktests CI. I will revisit >>> it after Ming's patches get settled. >>> >>> [1] https://github.com/osandov/blktests/pull/112/commits/dd5852e69abc3247d7b0ec4faf916a395378362d >>> >> >> Hello, >> >> Sorry, I am not familiar with shell script. But **without** the backslash, >> I get this error: >> >> $ make >> make -C src all >> make[1]: Entering directory '/home/alinux/workspace/blktests/src' >> Makefile:5: *** unterminated call to function 'shell': missing ')'. Stop. >> make[1]: Leaving directory '/home/alinux/workspace/blktests/src' >> make: *** [Makefile:5: all] Error 2 > > I see... I googled and learned that make version 4.3 introduced this '# inside > macro' handling difference [2]. I guess your make has version older than 4.3, > isn't it? > > [2] https://lwn.net/Articles/810071/ > > Per the the LWN article [2], the fix should be as follows. It works as expected > on my system with make version 4.3. Could you try it on your system? > > diff --git a/src/Makefile b/src/Makefile > index 81c6541..322eb1c 100644 > --- a/src/Makefile > +++ b/src/Makefile > @@ -1,8 +1,10 @@ > -HAVE_C_HEADER = $(shell if echo "\#include <$(1)>" | \ > +H := \# > + > +HAVE_C_HEADER = $(shell if echo "$(H)include <$(1)>" | \ > $(CC) -E - > /dev/null 2>&1; then echo "$(2)"; \ > else echo "$(3)"; fi) > > -HAVE_C_MACRO = $(shell if echo "#include <$(1)>" | \ > +HAVE_C_MACRO = $(shell if echo -e "$(H)include <$(1)>" | \ > $(CC) -E - 2>&1 /dev/null | grep $(2) > /dev/null 2>&1; \ > then echo 1;else echo 0; fi) My make version is 4.2.1, and your fix works! Thanks, Shinichiro. Regards, Zhang
On Feb 27, 2023 / 14:10, Ziyang Zhang wrote:
[...]
> My make version is 4.2.1, and your fix works! Thanks, Shinichiro.
Thanks Ziyang. Based on thix fix, I will prepare a follow-up patch for Ming's
series.
diff --git a/src/.gitignore b/src/.gitignore index 355bed3..df7aff5 100644 --- a/src/.gitignore +++ b/src/.gitignore @@ -8,3 +8,4 @@ /sg/dxfer-from-dev /sg/syzkaller1 /zbdioctl +/miniublk diff --git a/src/Makefile b/src/Makefile index 3b587f6..81c6541 100644 --- a/src/Makefile +++ b/src/Makefile @@ -2,6 +2,10 @@ HAVE_C_HEADER = $(shell if echo "\#include <$(1)>" | \ $(CC) -E - > /dev/null 2>&1; then echo "$(2)"; \ else echo "$(3)"; fi) +HAVE_C_MACRO = $(shell if echo "#include <$(1)>" | \ + $(CC) -E - 2>&1 /dev/null | grep $(2) > /dev/null 2>&1; \ + then echo 1;else echo 0; fi) + C_TARGETS := \ loblksize \ loop_change_fd \ @@ -13,16 +17,27 @@ C_TARGETS := \ sg/syzkaller1 \ zbdioctl +C_MINIUBLK := miniublk + +HAVE_LIBURING := $(call HAVE_C_MACRO,liburing.h,IORING_OP_URING_CMD) +HAVE_UBLK_HEADER := $(call HAVE_C_HEADER,linux/ublk_cmd.h,1) + CXX_TARGETS := \ discontiguous-io +ifeq ($(HAVE_LIBURING)$(HAVE_UBLK_HEADER), 11) +TARGETS := $(C_TARGETS) $(CXX_TARGETS) $(C_MINIUBLK) +else +$(info Skip $(C_MINIUBLK) build due to missing kernel header(v6.0+) or liburing(2.2+)) TARGETS := $(C_TARGETS) $(CXX_TARGETS) +endif CONFIG_DEFS := $(call HAVE_C_HEADER,linux/blkzoned.h,-DHAVE_LINUX_BLKZONED_H) override CFLAGS := -O2 -Wall -Wshadow $(CFLAGS) $(CONFIG_DEFS) override CXXFLAGS := -O2 -std=c++11 -Wall -Wextra -Wshadow -Wno-sign-compare \ -Werror $(CXXFLAGS) $(CONFIG_DEFS) +MINIUBLK_FLAGS := -D_GNU_SOURCE -lpthread -luring all: $(TARGETS) @@ -39,4 +54,7 @@ $(C_TARGETS): %: %.c $(CXX_TARGETS): %: %.cpp $(CXX) $(CPPFLAGS) $(CXXFLAGS) -o $@ $^ +$(C_MINIUBLK): %: miniublk.c + $(CC) $(CFLAGS) $(MINIUBLK_FLAGS) -o $@ miniublk.c + .PHONY: all clean install diff --git a/src/miniublk.c b/src/miniublk.c new file mode 100644 index 0000000..e84ba41 --- /dev/null +++ b/src/miniublk.c @@ -0,0 +1,1376 @@ +// SPDX-License-Identifier: GPL-3.0+ +// Copyright (C) 2023 Ming Lei + +/* + * io_uring based mini ublk implementation with null/loop target, + * for test purpose only. + * + * So please keep it clean & simple & reliable. + */ + +#include <unistd.h> +#include <stdlib.h> +#include <assert.h> +#include <stdio.h> +#include <stdarg.h> +#include <string.h> +#include <pthread.h> +#include <getopt.h> +#include <limits.h> +#include <sys/syscall.h> +#include <sys/mman.h> +#include <sys/ioctl.h> +#include <liburing.h> +#include <linux/ublk_cmd.h> + +#define CTRL_DEV "/dev/ublk-control" +#define UBLKC_DEV "/dev/ublkc" +#define UBLK_CTRL_RING_DEPTH 32 + +/* queue idle timeout */ +#define UBLKSRV_IO_IDLE_SECS 20 + +#define UBLK_IO_MAX_BYTES 65536 +#define UBLK_MAX_QUEUES 4 +#define UBLK_QUEUE_DEPTH 128 + +#define UBLK_DBG_DEV (1U << 0) +#define UBLK_DBG_QUEUE (1U << 1) +#define UBLK_DBG_IO_CMD (1U << 2) +#define UBLK_DBG_IO (1U << 3) +#define UBLK_DBG_CTRL_CMD (1U << 4) +#define UBLK_LOG (1U << 5) + +struct ublk_dev; +struct ublk_queue; + +struct ublk_ctrl_cmd_data { + unsigned short cmd_op; +#define CTRL_CMD_HAS_DATA 1 +#define CTRL_CMD_HAS_BUF 2 + unsigned short flags; + + __u64 data[2]; + __u64 addr; + __u32 len; +}; + +struct ublk_io { + char *buf_addr; + +#define UBLKSRV_NEED_FETCH_RQ (1UL << 0) +#define UBLKSRV_NEED_COMMIT_RQ_COMP (1UL << 1) +#define UBLKSRV_IO_FREE (1UL << 2) + unsigned int flags; + + unsigned int result; +}; + +struct ublk_tgt_ops { + const char *name; + int (*init_tgt)(struct ublk_dev *); + void (*deinit_tgt)(struct ublk_dev *); + + int (*queue_io)(struct ublk_queue *, int tag); + void (*tgt_io_done)(struct ublk_queue *, + int tag, const struct io_uring_cqe *); +}; + +struct ublk_tgt { + unsigned long dev_size; + const struct ublk_tgt_ops *ops; + int argc; + char **argv; + struct ublk_params params; +}; + +struct ublk_queue { + int q_id; + int q_depth; + unsigned int cmd_inflight; + unsigned int io_inflight; + struct ublk_dev *dev; + const struct ublk_tgt_ops *tgt_ops; + char *io_cmd_buf; + struct io_uring ring; + struct ublk_io ios[UBLK_QUEUE_DEPTH]; +#define UBLKSRV_QUEUE_STOPPING (1U << 0) +#define UBLKSRV_QUEUE_IDLE (1U << 1) + unsigned state; + int tid; + pthread_t thread; +}; + +struct ublk_dev { + struct ublk_tgt tgt; + struct ublksrv_ctrl_dev_info dev_info; + struct ublk_queue q[UBLK_MAX_QUEUES]; + + int fds[2]; /* fds[0] points to /dev/ublkcN */ + int nr_fds; + int ctrl_fd; + struct io_uring ring; +}; + +#ifndef offsetof +#define offsetof(TYPE, MEMBER) ((size_t)&((TYPE *)0)->MEMBER) +#endif + +#ifndef container_of +#define container_of(ptr, type, member) ({ \ + unsigned long __mptr = (unsigned long)(ptr); \ + ((type *)(__mptr - offsetof(type, member))); }) +#endif + +#define round_up(val, rnd) \ + (((val) + ((rnd) - 1)) & ~((rnd) - 1)) + +#define ublk_assert(x) do { \ + if (!(x)) { \ + ublk_err("%s %d: assert!\n", __func__, __LINE__); \ + assert(x); \ + } \ +} while (0) + +static const struct ublk_tgt_ops *ublk_find_tgt(const char *name); + +static unsigned int ublk_dbg_mask = UBLK_LOG; + +static inline unsigned ilog2(unsigned x) +{ + return sizeof(unsigned) * 8 - 1 - __builtin_clz(x); +} + +static inline int is_target_io(__u64 user_data) +{ + return (user_data & (1ULL << 63)) != 0; +} + +static inline __u64 build_user_data(unsigned tag, unsigned op, + unsigned tgt_data, unsigned is_target_io) +{ + assert(!(tag >> 16) && !(op >> 8) && !(tgt_data >> 16)); + + return tag | (op << 16) | (tgt_data << 24) | (__u64)is_target_io << 63; +} + +static inline unsigned int user_data_to_tag(__u64 user_data) +{ + return user_data & 0xffff; +} + +static inline unsigned int user_data_to_op(__u64 user_data) +{ + return (user_data >> 16) & 0xff; +} + +static void ublk_err(const char *fmt, ...) +{ + va_list ap; + + va_start(ap, fmt); + vfprintf(stderr, fmt, ap); +} + +static void ublk_log(const char *fmt, ...) +{ + if (ublk_dbg_mask & UBLK_LOG) { + va_list ap; + + va_start(ap, fmt); + vfprintf(stdout, fmt, ap); + } +} + +static void ublk_dbg(int level, const char *fmt, ...) +{ + if (level & ublk_dbg_mask) { + va_list ap; + va_start(ap, fmt); + vfprintf(stdout, fmt, ap); + } +} + +static inline void *ublk_get_sqe_cmd(const struct io_uring_sqe *sqe) +{ + return (void *)&sqe->addr3; +} + +static inline void ublk_mark_io_done(struct ublk_io *io, int res) +{ + io->flags |= (UBLKSRV_NEED_COMMIT_RQ_COMP | UBLKSRV_IO_FREE); + io->result = res; +} + +static inline const struct ublksrv_io_desc *ublk_get_iod( + const struct ublk_queue *q, int tag) +{ + return (struct ublksrv_io_desc *) + &(q->io_cmd_buf[tag * sizeof(struct ublksrv_io_desc)]); +} + +static inline void ublk_set_sqe_cmd_op(struct io_uring_sqe *sqe, + __u32 cmd_op) +{ + __u32 *addr = (__u32 *)&sqe->off; + + addr[0] = cmd_op; + addr[1] = 0; +} + +static inline int ublk_setup_ring(struct io_uring *r, int depth, + int cq_depth, unsigned flags) +{ + struct io_uring_params p; + + memset(&p, 0, sizeof(p)); + p.flags = flags | IORING_SETUP_CQSIZE; + p.cq_entries = cq_depth; + + return io_uring_queue_init_params(depth, r, &p); +} + +static inline void ublk_ctrl_init_cmd(struct ublk_dev *dev, + struct io_uring_sqe *sqe, + struct ublk_ctrl_cmd_data *data) +{ + struct ublksrv_ctrl_dev_info *info = &dev->dev_info; + struct ublksrv_ctrl_cmd *cmd = (struct ublksrv_ctrl_cmd *)ublk_get_sqe_cmd(sqe); + + sqe->fd = dev->ctrl_fd; + sqe->opcode = IORING_OP_URING_CMD; + sqe->ioprio = 0; + + if (data->flags & CTRL_CMD_HAS_BUF) { + cmd->addr = data->addr; + cmd->len = data->len; + } + + if (data->flags & CTRL_CMD_HAS_DATA) + cmd->data[0] = data->data[0]; + + cmd->dev_id = info->dev_id; + cmd->queue_id = -1; + + ublk_set_sqe_cmd_op(sqe, data->cmd_op); + + io_uring_sqe_set_data(sqe, cmd); +} + +static int __ublk_ctrl_cmd(struct ublk_dev *dev, + struct ublk_ctrl_cmd_data *data) +{ + struct io_uring_sqe *sqe; + struct io_uring_cqe *cqe; + int ret = -EINVAL; + + sqe = io_uring_get_sqe(&dev->ring); + if (!sqe) { + ublk_err("%s: can't get sqe ret %d\n", __func__, ret); + return ret; + } + + ublk_ctrl_init_cmd(dev, sqe, data); + + ret = io_uring_submit(&dev->ring); + if (ret < 0) { + ublk_err("uring submit ret %d\n", ret); + return ret; + } + + ret = io_uring_wait_cqe(&dev->ring, &cqe); + if (ret < 0) { + ublk_err("wait cqe: %s\n", strerror(-ret)); + return ret; + } + io_uring_cqe_seen(&dev->ring, cqe); + + return cqe->res; +} + +int ublk_ctrl_stop_dev(struct ublk_dev *dev) +{ + struct ublk_ctrl_cmd_data data = { + .cmd_op = UBLK_CMD_STOP_DEV, + }; + int ret; + + ret = __ublk_ctrl_cmd(dev, &data); + return ret; +} + +int ublk_ctrl_start_dev(struct ublk_dev *dev, + int daemon_pid) +{ + struct ublk_ctrl_cmd_data data = { + .cmd_op = UBLK_CMD_START_DEV, + .flags = CTRL_CMD_HAS_DATA, + }; + int ret; + + dev->dev_info.ublksrv_pid = data.data[0] = daemon_pid; + + ret = __ublk_ctrl_cmd(dev, &data); + + return ret; +} + +int ublk_ctrl_add_dev(struct ublk_dev *dev) +{ + struct ublk_ctrl_cmd_data data = { + .cmd_op = UBLK_CMD_ADD_DEV, + .flags = CTRL_CMD_HAS_BUF, + .addr = (__u64)&dev->dev_info, + .len = sizeof(struct ublksrv_ctrl_dev_info), + }; + + return __ublk_ctrl_cmd(dev, &data); +} + +int ublk_ctrl_del_dev(struct ublk_dev *dev) +{ + struct ublk_ctrl_cmd_data data = { + .cmd_op = UBLK_CMD_DEL_DEV, + .flags = 0, + }; + + return __ublk_ctrl_cmd(dev, &data); +} + +int ublk_ctrl_get_info(struct ublk_dev *dev) +{ + struct ublk_ctrl_cmd_data data = { + .cmd_op = UBLK_CMD_GET_DEV_INFO, + .flags = CTRL_CMD_HAS_BUF, + .addr = (__u64)&dev->dev_info, + .len = sizeof(struct ublksrv_ctrl_dev_info), + }; + + return __ublk_ctrl_cmd(dev, &data); +} + +int ublk_ctrl_set_params(struct ublk_dev *dev, + struct ublk_params *params) +{ + struct ublk_ctrl_cmd_data data = { + .cmd_op = UBLK_CMD_SET_PARAMS, + .flags = CTRL_CMD_HAS_BUF, + .addr = (__u64)params, + .len = sizeof(*params), + }; + params->len = sizeof(*params); + return __ublk_ctrl_cmd(dev, &data); +} + +static int ublk_ctrl_get_params(struct ublk_dev *dev, + struct ublk_params *params) +{ + struct ublk_ctrl_cmd_data data = { + .cmd_op = UBLK_CMD_GET_PARAMS, + .flags = CTRL_CMD_HAS_BUF, + .addr = (__u64)params, + .len = sizeof(*params), + }; + int ret; + + params->len = sizeof(*params); + + ret = __ublk_ctrl_cmd(dev, &data); + return ret; +} + +static const char *ublk_dev_state_desc(struct ublk_dev *dev) +{ + switch (dev->dev_info.state) { + case UBLK_S_DEV_DEAD: + return "DEAD"; + case UBLK_S_DEV_LIVE: + return "LIVE"; + default: + return "UNKNOWN"; + }; +} + +static void ublk_ctrl_dump(struct ublk_dev *dev, bool show_queue) +{ + struct ublksrv_ctrl_dev_info *info = &dev->dev_info; + int ret; + struct ublk_params p; + + ret = ublk_ctrl_get_params(dev, &p); + if (ret < 0) { + ublk_err("failed to get params %m\n"); + return; + } + + ublk_log("dev id %d: nr_hw_queues %d queue_depth %d block size %d dev_capacity %lld\n", + info->dev_id, + info->nr_hw_queues, info->queue_depth, + 1 << p.basic.logical_bs_shift, p.basic.dev_sectors); + ublk_log("\tmax rq size %d daemon pid %d flags 0x%llx state %s\n", + info->max_io_buf_bytes, + info->ublksrv_pid, info->flags, + ublk_dev_state_desc(dev)); + if (show_queue) { + int i; + + for (i = 0; i < dev->dev_info.nr_hw_queues; i++) + ublk_log("\tqueue 0 tid: %d\n", dev->q[i].tid); + } + fflush(stdout); +} + +static void ublk_ctrl_deinit(struct ublk_dev *dev) +{ + close(dev->ctrl_fd); + free(dev); +} + +static struct ublk_dev *ublk_ctrl_init() +{ + struct ublk_dev *dev = (struct ublk_dev *)calloc(1, sizeof(*dev)); + struct ublksrv_ctrl_dev_info *info = &dev->dev_info; + int ret; + + dev->ctrl_fd = open(CTRL_DEV, O_RDWR); + if (dev->ctrl_fd < 0) { + ublk_err("control dev %s can't be opened: %m\n", CTRL_DEV); + exit(dev->ctrl_fd); + } + info->max_io_buf_bytes = UBLK_IO_MAX_BYTES; + + ret = ublk_setup_ring(&dev->ring, UBLK_CTRL_RING_DEPTH, + UBLK_CTRL_RING_DEPTH, IORING_SETUP_SQE128); + if (ret < 0) { + ublk_err("queue_init: %s\n", strerror(-ret)); + free(dev); + return NULL; + } + dev->nr_fds = 1; + + return dev; +} + +static int ublk_queue_cmd_buf_sz(struct ublk_queue *q) +{ + int size = q->q_depth * sizeof(struct ublksrv_io_desc); + unsigned int page_sz = getpagesize(); + + return round_up(size, page_sz); +} + +static void ublk_queue_deinit(struct ublk_queue *q) +{ + int i; + int nr_ios = q->q_depth; + + io_uring_unregister_ring_fd(&q->ring); + + if (q->ring.ring_fd > 0) { + io_uring_unregister_files(&q->ring); + close(q->ring.ring_fd); + q->ring.ring_fd = -1; + } + + if (q->io_cmd_buf) + munmap(q->io_cmd_buf, ublk_queue_cmd_buf_sz(q)); + + for (i = 0; i < nr_ios; i++) + free(q->ios[i].buf_addr); +} + +static int ublk_queue_init(struct ublk_queue *q) +{ + struct ublk_dev *dev = q->dev; + int depth = dev->dev_info.queue_depth; + int i, ret = -1; + int cmd_buf_size, io_buf_size; + unsigned long off; + int ring_depth = depth, cq_depth = depth; + + q->tgt_ops = dev->tgt.ops; + q->state = 0; + q->q_depth = depth; + q->cmd_inflight = 0; + q->tid = gettid(); + + cmd_buf_size = ublk_queue_cmd_buf_sz(q); + off = UBLKSRV_CMD_BUF_OFFSET + + q->q_id * (UBLK_MAX_QUEUE_DEPTH * sizeof(struct ublksrv_io_desc)); + q->io_cmd_buf = (char *)mmap(0, cmd_buf_size, PROT_READ, + MAP_SHARED | MAP_POPULATE, dev->fds[0], off); + if (q->io_cmd_buf == MAP_FAILED) { + ublk_err("ublk dev %d queue %d map io_cmd_buf failed\n", + q->dev->dev_info.dev_id, q->q_id); + goto fail; + } + + io_buf_size = dev->dev_info.max_io_buf_bytes; + for (i = 0; i < q->q_depth; i++) { + q->ios[i].buf_addr = NULL; + + if (posix_memalign((void **)&q->ios[i].buf_addr, + getpagesize(), io_buf_size)) { + ublk_err("ublk dev %d queue %d io %d posix_memalign failed\n", + dev->dev_info.dev_id, q->q_id, i); + goto fail; + } + q->ios[i].flags = UBLKSRV_NEED_FETCH_RQ | UBLKSRV_IO_FREE; + } + + ret = ublk_setup_ring(&q->ring, ring_depth, cq_depth, + IORING_SETUP_SQE128 | IORING_SETUP_COOP_TASKRUN); + if (ret < 0) { + ublk_err("ublk dev %d queue %d setup io_uring failed %d\n", + q->dev->dev_info.dev_id, q->q_id, ret); + goto fail; + } + + io_uring_register_ring_fd(&q->ring); + + ret = io_uring_register_files(&q->ring, dev->fds, dev->nr_fds); + if (ret) { + ublk_err("ublk dev %d queue %d register files failed %d\n", + q->dev->dev_info.dev_id, q->q_id, ret); + goto fail; + } + + return 0; + fail: + ublk_queue_deinit(q); + ublk_err("ublk dev %d queue %d failed\n", + dev->dev_info.dev_id, q->q_id); + return -ENOMEM; +} + +static int ublk_dev_prep(struct ublk_dev *dev) +{ + int dev_id = dev->dev_info.dev_id; + char buf[64]; + int ret; + + snprintf(buf, 64, "%s%d", UBLKC_DEV, dev_id); + dev->fds[0] = open(buf, O_RDWR); + if (dev->fds[0] < 0) { + ret = -EBADF; + ublk_err("can't open %s, ret %d\n", buf, dev->fds[0]); + goto fail; + } + + if (dev->tgt.ops->init_tgt) + ret = dev->tgt.ops->init_tgt(dev); + else + ret = 0; + + return ret; +fail: + close(dev->fds[0]); + return ret; +} + +static void ublk_dev_unprep(struct ublk_dev *dev) +{ + if (dev->tgt.ops->deinit_tgt) + dev->tgt.ops->deinit_tgt(dev); + close(dev->fds[0]); +} + +static int ublk_queue_io_cmd(struct ublk_queue *q, + struct ublk_io *io, unsigned tag) +{ + struct ublksrv_io_cmd *cmd; + struct io_uring_sqe *sqe; + unsigned int cmd_op = 0; + __u64 user_data; + + /* only freed io can be issued */ + if (!(io->flags & UBLKSRV_IO_FREE)) + return 0; + + /* we issue because we need either fetching or committing */ + if (!(io->flags & + (UBLKSRV_NEED_FETCH_RQ | UBLKSRV_NEED_COMMIT_RQ_COMP))) + return 0; + + if (io->flags & UBLKSRV_NEED_COMMIT_RQ_COMP) + cmd_op = UBLK_IO_COMMIT_AND_FETCH_REQ; + else if (io->flags & UBLKSRV_NEED_FETCH_RQ) + cmd_op = UBLK_IO_FETCH_REQ; + + sqe = io_uring_get_sqe(&q->ring); + if (!sqe) { + ublk_err("%s: run out of sqe %d, tag %d\n", + __func__, q->q_id, tag); + return -1; + } + + cmd = (struct ublksrv_io_cmd *)ublk_get_sqe_cmd(sqe); + + if (cmd_op == UBLK_IO_COMMIT_AND_FETCH_REQ) + cmd->result = io->result; + + /* These fields should be written once, never change */ + ublk_set_sqe_cmd_op(sqe, cmd_op); + sqe->fd = 0; /* dev->fds[0] */ + sqe->opcode = IORING_OP_URING_CMD; + sqe->flags = IOSQE_FIXED_FILE; + sqe->rw_flags = 0; + cmd->tag = tag; + cmd->addr = (__u64)io->buf_addr; + cmd->q_id = q->q_id; + + user_data = build_user_data(tag, cmd_op, 0, 0); + io_uring_sqe_set_data64(sqe, user_data); + + io->flags = 0; + + q->cmd_inflight += 1; + + ublk_dbg(UBLK_DBG_IO_CMD, "%s: (qid %d tag %u cmd_op %u) iof %x stopping %d\n", + __func__, q->q_id, tag, cmd_op, + io->flags, !!(q->state & UBLKSRV_QUEUE_STOPPING)); + return 1; +} + +static int ublk_complete_io(struct ublk_queue *q, + unsigned tag, int res) +{ + struct ublk_io *io = &q->ios[tag]; + + ublk_mark_io_done(io, res); + + return ublk_queue_io_cmd(q, io, tag); +} + +static void ublk_submit_fetch_commands(struct ublk_queue *q) +{ + int i = 0; + + for (i = 0; i < q->q_depth; i++) + ublk_queue_io_cmd(q, &q->ios[i], i); +} + +static int ublk_queue_is_idle(struct ublk_queue *q) +{ + return !io_uring_sq_ready(&q->ring) && !q->io_inflight; +} + +static int ublk_queue_is_done(struct ublk_queue *q) +{ + return (q->state & UBLKSRV_QUEUE_STOPPING) && ublk_queue_is_idle(q); +} + +static void ublk_queue_discard_io_pages(struct ublk_queue *q) +{ + const struct ublk_dev *dev = q->dev; + unsigned int io_buf_size = dev->dev_info.max_io_buf_bytes; + int i = 0; + + for (i = 0; i < q->q_depth; i++) + madvise(q->ios[i].buf_addr, io_buf_size, MADV_DONTNEED); +} + +static void ublk_queue_idle_enter(struct ublk_queue *q) +{ + if (q->state & UBLKSRV_QUEUE_IDLE) + return; + + ublk_dbg(UBLK_DBG_QUEUE, "dev%d-q%d: enter idle %x\n", + q->dev->dev_info.dev_id, q->q_id, q->state); + ublk_queue_discard_io_pages(q); + q->state |= UBLKSRV_QUEUE_IDLE; +} + +static void ublk_queue_idle_exit(struct ublk_queue *q) +{ + if (q->state & UBLKSRV_QUEUE_IDLE) { + ublk_dbg(UBLK_DBG_QUEUE, "dev%d-q%d: exit idle %x\n", + q->dev->dev_info.dev_id, q->q_id, q->state); + q->state &= ~UBLKSRV_QUEUE_IDLE; + } +} + +static inline void ublksrv_handle_tgt_cqe(struct ublk_queue *q, + struct io_uring_cqe *cqe) +{ + unsigned tag = user_data_to_tag(cqe->user_data); + + if (cqe->res < 0 && cqe->res != -EAGAIN) + ublk_err("%s: failed tgt io: res %d qid %u tag %u, cmd_op %u\n", + __func__, cqe->res, q->q_id, + user_data_to_tag(cqe->user_data), + user_data_to_op(cqe->user_data)); + + if (q->tgt_ops->tgt_io_done) + q->tgt_ops->tgt_io_done(q, tag, cqe); +} + +static void ublk_handle_cqe(struct io_uring *r, + struct io_uring_cqe *cqe, void *data) +{ + struct ublk_queue *q = container_of(r, struct ublk_queue, ring); + unsigned tag = user_data_to_tag(cqe->user_data); + unsigned cmd_op = user_data_to_op(cqe->user_data); + int fetch = (cqe->res != UBLK_IO_RES_ABORT) && + !(q->state & UBLKSRV_QUEUE_STOPPING); + struct ublk_io *io; + + ublk_dbg(UBLK_DBG_IO_CMD, "%s: res %d (qid %d tag %u cmd_op %u target %d) stopping %d\n", + __func__, cqe->res, q->q_id, tag, cmd_op, + is_target_io(cqe->user_data), + (q->state & UBLKSRV_QUEUE_STOPPING)); + + /* Don't retrieve io in case of target io */ + if (is_target_io(cqe->user_data)) { + ublksrv_handle_tgt_cqe(q, cqe); + return; + } + + io = &q->ios[tag]; + q->cmd_inflight--; + + if (!fetch) { + q->state |= UBLKSRV_QUEUE_STOPPING; + io->flags &= ~UBLKSRV_NEED_FETCH_RQ; + } + + if (cqe->res == UBLK_IO_RES_OK) { + ublk_assert(tag < q->q_depth); + q->tgt_ops->queue_io(q, tag); + } else { + /* + * COMMIT_REQ will be completed immediately since no fetching + * piggyback is required. + * + * Marking IO_FREE only, then this io won't be issued since + * we only issue io with (UBLKSRV_IO_FREE | UBLKSRV_NEED_*) + * + * */ + io->flags = UBLKSRV_IO_FREE; + } +} + +static int ublk_reap_events_uring(struct io_uring *r) +{ + struct io_uring_cqe *cqe; + unsigned head; + int count = 0; + + io_uring_for_each_cqe(r, head, cqe) { + ublk_handle_cqe(r, cqe, NULL); + count += 1; + } + io_uring_cq_advance(r, count); + + return count; +} + +static int ublk_process_io(struct ublk_queue *q) +{ + int ret, reapped; + struct __kernel_timespec ts = { + .tv_sec = UBLKSRV_IO_IDLE_SECS, + .tv_nsec = 0 + }; + struct __kernel_timespec *tsp = (q->state & UBLKSRV_QUEUE_IDLE) ? + NULL : &ts; + struct io_uring_cqe *cqe; + + ublk_dbg(UBLK_DBG_QUEUE, "dev%d-q%d: to_submit %d inflight cmd %u stopping %d\n", + q->dev->dev_info.dev_id, + q->q_id, io_uring_sq_ready(&q->ring), + q->cmd_inflight, + (q->state & UBLKSRV_QUEUE_STOPPING)); + + if (ublk_queue_is_done(q)) + return -ENODEV; + + ret = io_uring_submit_and_wait_timeout(&q->ring, &cqe, 1, tsp, NULL); + reapped = ublk_reap_events_uring(&q->ring); + + ublk_dbg(UBLK_DBG_QUEUE, "submit result %d, reapped %d stop %d idle %d\n", + ret, reapped, (q->state & UBLKSRV_QUEUE_STOPPING), + (q->state & UBLKSRV_QUEUE_IDLE)); + + if (!(q->state & UBLKSRV_QUEUE_STOPPING)) { + if (ret == -ETIME && reapped == 0 && ublk_queue_is_idle(q)) + ublk_queue_idle_enter(q); + else + ublk_queue_idle_exit(q); + } + return reapped; +} + +static void *ublk_io_handler_fn(void *data) +{ + struct ublk_queue *q = data; + int dev_id = q->dev->dev_info.dev_id; + int ret; + + ret = ublk_queue_init(q); + if (ret) { + ublk_err("ublk dev %d queue %d init queue failed\n", + dev_id, q->q_id); + return NULL; + } + + /* submit all io commands to ublk driver */ + ublk_submit_fetch_commands(q); + + ublk_dbg(UBLK_DBG_QUEUE, "tid %d: ublk dev %d queue %d started\n", + gettid(), + dev_id, q->q_id); + do { + if (ublk_process_io(q) < 0) + break; + } while (1); + + ublk_dbg(UBLK_DBG_QUEUE, "ublk dev %d queue %d exited\n", dev_id, q->q_id); + ublk_queue_deinit(q); + return NULL; +} + +static void ublk_set_parameters(struct ublk_dev *dev) +{ + int ret; + + ret = ublk_ctrl_set_params(dev, &dev->tgt.params); + if (ret) + ublk_err("dev %d set basic parameter failed %d\n", + dev->dev_info.dev_id, ret); +} + +static int ublk_start_daemon(struct ublk_dev *dev) +{ + int ret, i; + void *thread_ret; + const struct ublksrv_ctrl_dev_info *dinfo = &dev->dev_info; + + daemon(1, 1); + + ublk_dbg(UBLK_DBG_DEV, "%s enter\n", __func__); + + ret = ublk_dev_prep(dev); + if (!dev) + return -ENOMEM; + + for (i = 0; i < dinfo->nr_hw_queues; i++) { + dev->q[i].dev = dev; + dev->q[i].q_id = i; + pthread_create(&dev->q[i].thread, NULL, + ublk_io_handler_fn, + &dev->q[i]); + } + + ublk_set_parameters(dev); + + /* everything is fine now, start us */ + ret = ublk_ctrl_start_dev(dev, getpid()); + if (ret < 0) + goto fail; + + ublk_ctrl_get_info(dev); + ublk_ctrl_dump(dev, true); + + /* wait until we are terminated */ + for (i = 0; i < dinfo->nr_hw_queues; i++) + pthread_join(dev->q[i].thread, &thread_ret); + fail: + ublk_dev_unprep(dev); + ublk_dbg(UBLK_DBG_DEV, "%s exit\n", __func__); + + return ret; +} + +static int cmd_dev_add(int argc, char *argv[]) +{ + static const struct option longopts[] = { + { "type", 1, NULL, 't' }, + { "number", 1, NULL, 'n' }, + { "queues", 1, NULL, 'q' }, + { "depth", 1, NULL, 'd' }, + { "debug_mask", 1, NULL, 0}, + { "quiet", 0, NULL, 0}, + { NULL } + }; + const struct ublk_tgt_ops *ops; + struct ublksrv_ctrl_dev_info *info; + struct ublk_dev *dev; + int ret, option_idx, opt; + const char *tgt_type = NULL; + int dev_id = -1; + unsigned nr_queues = 2, depth = UBLK_QUEUE_DEPTH; + + while ((opt = getopt_long(argc, argv, "-:t:n:d:q:", + longopts, &option_idx)) != -1) { + switch (opt) { + case 'n': + dev_id = strtol(optarg, NULL, 10); + break; + case 't': + tgt_type = optarg; + break; + case 'q': + nr_queues = strtol(optarg, NULL, 10); + break; + case 'd': + depth = strtol(optarg, NULL, 10); + break; + case 0: + if (!strcmp(longopts[option_idx].name, "debug_mask")) + ublk_dbg_mask = strtol(optarg, NULL, 16); + if (!strcmp(longopts[option_idx].name, "quiet")) + ublk_dbg_mask = 0; + break; + } + } + + optind = 0; + + ops = ublk_find_tgt(tgt_type); + if (!ops) { + ublk_err("%s: no such tgt type, type %s\n", + __func__, tgt_type); + return -ENODEV; + } + + if (nr_queues > UBLK_MAX_QUEUES || depth > UBLK_QUEUE_DEPTH) { + ublk_err("%s: invalid nr_queues or depth queues %u depth %u\n", + __func__, nr_queues, depth); + return -EINVAL; + } + + dev = ublk_ctrl_init(); + if (!dev) { + ublk_err("%s: can't alloc dev id %d, type %s\n", + __func__, dev_id, tgt_type); + return -ENOMEM; + } + + info = &dev->dev_info; + info->dev_id = dev_id; + info->nr_hw_queues = nr_queues; + info->queue_depth = depth; + dev->tgt.ops = ops; + dev->tgt.argc = argc; + dev->tgt.argv = argv; + + ret = ublk_ctrl_add_dev(dev); + if (ret < 0) { + ublk_err("%s: can't add dev id %d, type %s ret %d\n", + __func__, dev_id, tgt_type, ret); + goto fail; + } + + ret = ublk_start_daemon(dev); + if (ret < 0) { + ublk_err("%s: can't start daemon id %d, type %s\n", + __func__, dev_id, tgt_type); + goto fail_del; + } +fail_del: + ublk_ctrl_del_dev(dev); +fail: + ublk_ctrl_deinit(dev); + return ret; +} + +static int ublk_stop_io_daemon(const struct ublk_dev *dev) +{ + int daemon_pid = dev->dev_info.ublksrv_pid; + int cnt = 0, ret; + + if (daemon_pid == -1) + return 0; + + /* wait until daemon is exited, or timeout after 3 seconds */ + do { + ret = kill(daemon_pid, 0); + if (ret) + break; + usleep(500000); + cnt++; + } while (!ret && cnt < 6); + + ublk_dbg(UBLK_DBG_DEV, "%s: pid %d ret %d\n", __func__, daemon_pid, ret); + + return ret != 0 ? 0 : -1; +} + +static int __cmd_dev_del(int number, bool log) +{ + struct ublk_dev *dev; + int ret; + + dev = ublk_ctrl_init(); + dev->dev_info.dev_id = number; + + ret = ublk_ctrl_get_info(dev); + if (ret < 0) { + goto fail; + } + + ret = ublk_ctrl_stop_dev(dev); + if (ret < 0) { + if (log) + ublk_err("stop dev %d failed\n", number); + goto fail; + } + + ret = ublk_stop_io_daemon(dev); + if (ret < 0) { + if (log) + ublk_err("stop daemon %d failed\n", number); + } + + ublk_ctrl_del_dev(dev); +fail: + ublk_ctrl_deinit(dev); + return ret; +} + +static int cmd_dev_del(int argc, char *argv[]) +{ + static const struct option longopts[] = { + { "number", 1, NULL, 'n' }, + { "all", 0, NULL, 'a' }, + { "debug_mask", 1, NULL, 0}, + { NULL } + }; + int number = -2; + int opt, i, option_idx; + + while ((opt = getopt_long(argc, argv, "n:a", + longopts, &option_idx)) != -1) { + switch (opt) { + case 'a': + number = -1; + break; + + case 'n': + number = strtol(optarg, NULL, 10); + break; + case 0: + if (!strcmp(longopts[option_idx].name, "debug_mask")) + ublk_dbg_mask = strtol(optarg, NULL, 16); + break; + } + } + + if (number >= 0) + return __cmd_dev_del(number, true); + else if (number != -1) { + ublk_err("%s: pass wrong devid or not delete via -a\n"); + return -EINVAL; + } + + for (i = 0; i < 255; i++) + __cmd_dev_del(i, false); + + return 0; +} + +static int __cmd_dev_list(int number, bool log) +{ + struct ublk_dev *dev = ublk_ctrl_init(); + int ret; + + dev->dev_info.dev_id = number; + + ret = ublk_ctrl_get_info(dev); + if (ret < 0) { + if (log) + ublk_err("%s: can't get dev info from %d: %d\n", + __func__, number, ret); + } else { + ublk_ctrl_dump(dev, false); + } + + ublk_ctrl_deinit(dev); + + return ret; +} + + +static int cmd_dev_list(int argc, char *argv[]) +{ + static const struct option longopts[] = { + { "number", 1, NULL, 'n' }, + { "all", 0, NULL, 'a' }, + { NULL } + }; + int number = -1; + int opt, i; + + while ((opt = getopt_long(argc, argv, "n:a", + longopts, NULL)) != -1) { + switch (opt) { + case 'a': + break; + + case 'n': + number = strtol(optarg, NULL, 10); + break; + } + } + + if (number >= 0) + return __cmd_dev_list(number, true); + + for (i = 0; i < 255; i++) + __cmd_dev_list(i, false); + + return 0; +} + +static int cmd_dev_help(int argc, char *argv[]) +{ + printf("%s add -t {null|loop} [-q nr_queues] [-d depth] [-n dev_id] \n", + argv[0]); + printf("\t default: nr_queues=2(max 4), depth=128(max 128), dev_id=-1(auto allocation)\n"); + printf("\t -t loop -f backing_file \n"); + printf("\t -t null\n"); + printf("%s del [-n dev_id] -a \n", argv[0]); + printf("\t -a delete all devices -n delete specified device\n"); + printf("%s list [-n dev_id] -a \n", argv[0]); + printf("\t -a list all devices, -n list specified device, default -a \n"); + + return 0; +} + +static int ublk_null_tgt_init(struct ublk_dev *dev) +{ + const struct ublksrv_ctrl_dev_info *info = &dev->dev_info; + unsigned long dev_size = 250UL << 30; + + dev->tgt.dev_size = dev_size; + dev->tgt.params = (struct ublk_params) { + .types = UBLK_PARAM_TYPE_BASIC, + .basic = { + .logical_bs_shift = 9, + .physical_bs_shift = 12, + .io_opt_shift = 12, + .io_min_shift = 9, + .max_sectors = info->max_io_buf_bytes >> 9, + .dev_sectors = dev_size >> 9, + }, + }; + + return 0; +} + +static int ublk_null_queue_io(struct ublk_queue *q, int tag) +{ + const struct ublksrv_io_desc *iod = ublk_get_iod(q, tag); + + ublk_complete_io(q, tag, iod->nr_sectors << 9); + + return 0; +} + +static int loop_queue_tgt_io(struct ublk_queue *q, int tag) +{ + const struct ublksrv_io_desc *iod = ublk_get_iod(q, tag); + struct io_uring_sqe *sqe = io_uring_get_sqe(&q->ring); + unsigned ublk_op = ublksrv_get_op(iod); + + if (!sqe) + return -ENOMEM; + + switch (ublk_op) { + case UBLK_IO_OP_FLUSH: + io_uring_prep_sync_file_range(sqe, 1 /*fds[1]*/, + iod->nr_sectors << 9, + iod->start_sector << 9, + IORING_FSYNC_DATASYNC); + io_uring_sqe_set_flags(sqe, IOSQE_FIXED_FILE); + break; + case UBLK_IO_OP_WRITE_ZEROES: + case UBLK_IO_OP_DISCARD: + return -ENOTSUP; + case UBLK_IO_OP_READ: + io_uring_prep_read(sqe, 1 /*fds[1]*/, + (void *)iod->addr, + iod->nr_sectors << 9, + iod->start_sector << 9); + io_uring_sqe_set_flags(sqe, IOSQE_FIXED_FILE); + break; + case UBLK_IO_OP_WRITE: + io_uring_prep_write(sqe, 1 /*fds[1]*/, + (void *)iod->addr, + iod->nr_sectors << 9, + iod->start_sector << 9); + io_uring_sqe_set_flags(sqe, IOSQE_FIXED_FILE); + break; + default: + return -EINVAL; + } + + q->io_inflight++; + /* bit63 marks us as tgt io */ + sqe->user_data = build_user_data(tag, ublk_op, 0, 1); + + ublk_dbg(UBLK_DBG_IO, "%s: tag %d ublk io %x %llx %u\n", __func__, tag, + iod->op_flags, iod->start_sector, iod->nr_sectors << 9); + return 1; +} + +static int ublk_loop_queue_io(struct ublk_queue *q, int tag) +{ + int queued = loop_queue_tgt_io(q, tag); + + if (queued < 0) + ublk_complete_io(q, tag, queued); + + return 0; +} + +static void ublk_loop_io_done(struct ublk_queue *q, int tag, + const struct io_uring_cqe *cqe) +{ + int cqe_tag = user_data_to_tag(cqe->user_data); + + ublk_assert(tag == cqe_tag); + ublk_complete_io(q, tag, cqe->res); + q->io_inflight--; +} + +static void ublk_loop_tgt_deinit(struct ublk_dev *dev) +{ + fsync(dev->fds[1]); + close(dev->fds[1]); +} + +static int ublk_loop_tgt_init(struct ublk_dev *dev) +{ + static const struct option lo_longopts[] = { + { "file", 1, NULL, 'f' }, + { NULL } + }; + unsigned long long bytes; + char **argv = dev->tgt.argv; + int argc = dev->tgt.argc; + char *file = NULL; + struct stat st; + int fd, opt; + struct ublk_params p = { + .types = UBLK_PARAM_TYPE_BASIC, + .basic = { + .logical_bs_shift = 9, + .physical_bs_shift = 12, + .io_opt_shift = 12, + .io_min_shift = 9, + .max_sectors = dev->dev_info.max_io_buf_bytes >> 9, + }, + }; + + while ((opt = getopt_long(argc, argv, "-:f:", + lo_longopts, NULL)) != -1) { + switch (opt) { + case 'f': + file = strdup(optarg); + break; + } + } + + ublk_dbg(UBLK_DBG_DEV, "%s: file %s\n", __func__, file); + + if (!file) + return -EINVAL; + + fd = open(file, O_RDWR); + if (fd < 0) { + ublk_err( "%s: backing file %s can't be opened\n", + __func__, file); + return -EBADF; + } + + if (fstat(fd, &st) < 0) { + close(fd); + return -EBADF; + } + + if (S_ISBLK(st.st_mode)) { + unsigned int bs, pbs; + + if (ioctl(fd, BLKGETSIZE64, &bytes) != 0) + return -EBADF; + if (ioctl(fd, BLKSSZGET, &bs) != 0) + return -1; + if (ioctl(fd, BLKPBSZGET, &pbs) != 0) + return -1; + p.basic.logical_bs_shift = ilog2(bs); + p.basic.physical_bs_shift = ilog2(pbs); + } else if (S_ISREG(st.st_mode)) { + bytes = st.st_size; + } else { + bytes = 0; + } + + if (fcntl(fd, F_SETFL, O_DIRECT)) { + p.basic.logical_bs_shift = 9; + p.basic.physical_bs_shift = 12; + ublk_log("%s: ublk-loop fallback to buffered IO\n", __func__); + } + + dev->tgt.dev_size = bytes; + p.basic.dev_sectors = bytes >> 9; + dev->fds[1] = fd; + dev->nr_fds += 1; + dev->tgt.params = p; + + return 0; +} + +const struct ublk_tgt_ops tgt_ops_list[] = { + { + .name = "null", + .init_tgt = ublk_null_tgt_init, + .queue_io = ublk_null_queue_io, + }, + + { + .name = "loop", + .init_tgt = ublk_loop_tgt_init, + .deinit_tgt = ublk_loop_tgt_deinit, + .queue_io = ublk_loop_queue_io, + .tgt_io_done = ublk_loop_io_done, + }, +}; + +static const struct ublk_tgt_ops *ublk_find_tgt(const char *name) +{ + const struct ublk_tgt_ops *ops; + int i; + + if (name == NULL) + return NULL; + + for (i = 0; sizeof(tgt_ops_list) / sizeof(*ops); i++) + if (strcmp(tgt_ops_list[i].name, name) == 0) + return &tgt_ops_list[i]; + return NULL; +} + +int main(int argc, char *argv[]) +{ + const char *cmd = argv[1]; + int ret = -EINVAL; + + if (argc == 1) + goto out; + + if (!strcmp(cmd, "add")) + ret = cmd_dev_add(argc, argv); + else if (!strcmp(cmd, "del")) + ret = cmd_dev_del(argc, argv); + else if (!strcmp(cmd, "list")) + ret = cmd_dev_list(argc, argv); + else if (!strcmp(cmd, "help")) + ret = cmd_dev_help(argc, argv); +out: + if (ret) + cmd_dev_help(argc, argv); + + return ret; +}
Prepare for adding ublk related test: 1) ublk delete is sync removal, this way is convenient to blkg/queue/disk instance leak issue 2) mini ublk has two builtin target(null, loop), and loop IO is handled by io_uring, so we can use ublk to cover part of io_uring workloads 3) not like loop/nbd, ublk won't pre-allocate/add disk, and always add/delete disk dynamically, this way may cover disk plug & unplug tests 4) ublk specific test given people starts to use it, so better to let blktest cover ublk related tests Add mini ublk source for test purpose only, which is easy to use: ./miniublk add -t {null|loop} [-q nr_queues] [-d depth] [-n dev_id] default: nr_queues=2(max 4), depth=128(max 128), dev_id=-1(auto allocation) -t loop -f backing_file -t null ./miniublk del [-n dev_id] -a -a delete all devices, -n delete specified device ./miniublk list [-n dev_id] -a -a list all devices, -n list specified device, default -a miniublk depends on liburing 2.2, adds HAVE_LIBURING for checking if liburing 2.2 exists; also add HAVE_UBLK_HEADER for checking ublk kernel UAPI header exists. If either of two dependencies can't be met, simply ignore miniublk target. Also v6.0 is the 1st linux kernel release with ublk. Signed-off-by: Ming Lei <ming.lei@redhat.com> --- src/.gitignore | 1 + src/Makefile | 18 + src/miniublk.c | 1376 ++++++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 1395 insertions(+) create mode 100644 src/miniublk.c