@@ -119,6 +119,10 @@ int coroutine_fn GRAPH_RDLOCK bdrv_co_zone_report(BlockDriverState *bs,
int coroutine_fn GRAPH_RDLOCK bdrv_co_zone_mgmt(BlockDriverState *bs,
BlockZoneOp op,
int64_t offset, int64_t len);
+int coroutine_fn GRAPH_RDLOCK bdrv_co_zone_append(BlockDriverState *bs,
+ int64_t *offset,
+ QEMUIOVector *qiov,
+ BdrvRequestFlags flags);
bool bdrv_can_write_zeroes_with_unmap(BlockDriverState *bs);
int bdrv_block_status(BlockDriverState *bs, int64_t offset,
@@ -722,6 +722,9 @@ struct BlockDriver {
BlockZoneDescriptor *zones);
int coroutine_fn (*bdrv_co_zone_mgmt)(BlockDriverState *bs, BlockZoneOp op,
int64_t offset, int64_t len);
+ int coroutine_fn (*bdrv_co_zone_append)(BlockDriverState *bs,
+ int64_t *offset, QEMUIOVector *qiov,
+ BdrvRequestFlags flags);
/* removable device specific */
bool coroutine_fn GRAPH_RDLOCK_PTR (*bdrv_co_is_inserted)(
@@ -30,6 +30,7 @@
#define QEMU_AIO_TRUNCATE 0x0080
#define QEMU_AIO_ZONE_REPORT 0x0100
#define QEMU_AIO_ZONE_MGMT 0x0200
+#define QEMU_AIO_ZONE_APPEND 0x0400
#define QEMU_AIO_TYPE_MASK \
(QEMU_AIO_READ | \
QEMU_AIO_WRITE | \
@@ -40,7 +41,8 @@
QEMU_AIO_COPY_RANGE | \
QEMU_AIO_TRUNCATE | \
QEMU_AIO_ZONE_REPORT | \
- QEMU_AIO_ZONE_MGMT)
+ QEMU_AIO_ZONE_MGMT | \
+ QEMU_AIO_ZONE_APPEND)
/* AIO flags */
#define QEMU_AIO_MISALIGNED 0x1000
@@ -53,6 +53,9 @@ BlockAIOCB *blk_aio_zone_report(BlockBackend *blk, int64_t offset,
BlockAIOCB *blk_aio_zone_mgmt(BlockBackend *blk, BlockZoneOp op,
int64_t offset, int64_t len,
BlockCompletionFunc *cb, void *opaque);
+BlockAIOCB *blk_aio_zone_append(BlockBackend *blk, int64_t *offset,
+ QEMUIOVector *qiov, BdrvRequestFlags flags,
+ BlockCompletionFunc *cb, void *opaque);
BlockAIOCB *blk_aio_pdiscard(BlockBackend *blk, int64_t offset, int64_t bytes,
BlockCompletionFunc *cb, void *opaque);
void blk_aio_cancel_async(BlockAIOCB *acb);
@@ -208,6 +211,12 @@ int coroutine_fn blk_co_zone_mgmt(BlockBackend *blk, BlockZoneOp op,
int64_t offset, int64_t len);
int co_wrapper_mixed blk_zone_mgmt(BlockBackend *blk, BlockZoneOp op,
int64_t offset, int64_t len);
+int coroutine_fn blk_co_zone_append(BlockBackend *blk, int64_t *offset,
+ QEMUIOVector *qiov,
+ BdrvRequestFlags flags);
+int co_wrapper_mixed blk_zone_append(BlockBackend *blk, int64_t *offset,
+ QEMUIOVector *qiov,
+ BdrvRequestFlags flags);
int co_wrapper_mixed blk_pdiscard(BlockBackend *blk, int64_t offset,
int64_t bytes);
@@ -1929,6 +1929,45 @@ BlockAIOCB *blk_aio_zone_mgmt(BlockBackend *blk, BlockZoneOp op,
return &acb->common;
}
+static void coroutine_fn blk_aio_zone_append_entry(void *opaque)
+{
+ BlkAioEmAIOCB *acb = opaque;
+ BlkRwCo *rwco = &acb->rwco;
+
+ rwco->ret = blk_co_zone_append(rwco->blk, (int64_t *)(uintptr_t)acb->bytes,
+ rwco->iobuf, rwco->flags);
+ blk_aio_complete(acb);
+}
+
+BlockAIOCB *blk_aio_zone_append(BlockBackend *blk, int64_t *offset,
+ QEMUIOVector *qiov, BdrvRequestFlags flags,
+ BlockCompletionFunc *cb, void *opaque) {
+ BlkAioEmAIOCB *acb;
+ Coroutine *co;
+ IO_CODE();
+
+ blk_inc_in_flight(blk);
+ acb = blk_aio_get(&blk_aio_em_aiocb_info, blk, cb, opaque);
+ acb->rwco = (BlkRwCo) {
+ .blk = blk,
+ .ret = NOT_DONE,
+ .flags = flags,
+ .iobuf = qiov,
+ };
+ acb->bytes = (int64_t)(uintptr_t)offset;
+ acb->has_returned = false;
+
+ co = qemu_coroutine_create(blk_aio_zone_append_entry, acb);
+ aio_co_enter(blk_get_aio_context(blk), co);
+ acb->has_returned = true;
+ if (acb->rwco.ret != NOT_DONE) {
+ replay_bh_schedule_oneshot_event(blk_get_aio_context(blk),
+ blk_aio_complete_bh, acb);
+ }
+
+ return &acb->common;
+}
+
/*
* Send a zone_report command.
* offset is a byte offset from the start of the device. No alignment
@@ -1982,6 +2021,28 @@ int coroutine_fn blk_co_zone_mgmt(BlockBackend *blk, BlockZoneOp op,
return ret;
}
+/*
+ * Send a zone_append command.
+ */
+int coroutine_fn blk_co_zone_append(BlockBackend *blk, int64_t *offset,
+ QEMUIOVector *qiov, BdrvRequestFlags flags)
+{
+ int ret;
+ IO_CODE();
+
+ blk_inc_in_flight(blk);
+ blk_wait_while_drained(blk);
+ GRAPH_RDLOCK_GUARD();
+ if (!blk_is_available(blk)) {
+ blk_dec_in_flight(blk);
+ return -ENOMEDIUM;
+ }
+
+ ret = bdrv_co_zone_append(blk_bs(blk), offset, qiov, flags);
+ blk_dec_in_flight(blk);
+ return ret;
+}
+
void blk_drain(BlockBackend *blk)
{
BlockDriverState *bs = blk_bs(blk);
@@ -160,6 +160,7 @@ typedef struct BDRVRawState {
bool has_write_zeroes:1;
bool use_linux_aio:1;
bool use_linux_io_uring:1;
+ int64_t *offset; /* offset of zone append operation */
int page_cache_inconsistent; /* errno from fdatasync failure */
bool has_fallocate;
bool needs_alignment;
@@ -1702,7 +1703,7 @@ static ssize_t handle_aiocb_rw_vector(RawPosixAIOData *aiocb)
ssize_t len;
len = RETRY_ON_EINTR(
- (aiocb->aio_type & QEMU_AIO_WRITE) ?
+ (aiocb->aio_type & (QEMU_AIO_WRITE | QEMU_AIO_ZONE_APPEND)) ?
qemu_pwritev(aiocb->aio_fildes,
aiocb->io.iov,
aiocb->io.niov,
@@ -1731,7 +1732,7 @@ static ssize_t handle_aiocb_rw_linear(RawPosixAIOData *aiocb, char *buf)
ssize_t len;
while (offset < aiocb->aio_nbytes) {
- if (aiocb->aio_type & QEMU_AIO_WRITE) {
+ if (aiocb->aio_type & (QEMU_AIO_WRITE | QEMU_AIO_ZONE_APPEND)) {
len = pwrite(aiocb->aio_fildes,
(const char *)buf + offset,
aiocb->aio_nbytes - offset,
@@ -1824,7 +1825,7 @@ static int handle_aiocb_rw(void *opaque)
}
nbytes = handle_aiocb_rw_linear(aiocb, buf);
- if (!(aiocb->aio_type & QEMU_AIO_WRITE)) {
+ if (!(aiocb->aio_type & (QEMU_AIO_WRITE | QEMU_AIO_ZONE_APPEND))) {
char *p = buf;
size_t count = aiocb->aio_nbytes, copy;
int i;
@@ -2457,8 +2458,12 @@ static int coroutine_fn raw_co_prw(BlockDriverState *bs, uint64_t offset,
if (fd_open(bs) < 0)
return -EIO;
#if defined(CONFIG_BLKZONED)
- if (type & QEMU_AIO_WRITE && bs->wps) {
+ if ((type & (QEMU_AIO_WRITE | QEMU_AIO_ZONE_APPEND)) && bs->wps) {
qemu_co_mutex_lock(&bs->wps->colock);
+ if (type & QEMU_AIO_ZONE_APPEND && bs->bl.zone_size) {
+ int index = offset / bs->bl.zone_size;
+ offset = bs->wps->wp[index];
+ }
}
#endif
@@ -2506,9 +2511,13 @@ out:
{
BlockZoneWps *wps = bs->wps;
if (ret == 0) {
- if (type & QEMU_AIO_WRITE && wps && bs->bl.zone_size) {
+ if ((type & (QEMU_AIO_WRITE | QEMU_AIO_ZONE_APPEND))
+ && wps && bs->bl.zone_size) {
uint64_t *wp = &wps->wp[offset / bs->bl.zone_size];
if (!BDRV_ZT_IS_CONV(*wp)) {
+ if (type & QEMU_AIO_ZONE_APPEND) {
+ *s->offset = *wp;
+ }
/* Advance the wp if needed */
if (offset + bytes > *wp) {
*wp = offset + bytes;
@@ -2516,12 +2525,12 @@ out:
}
}
} else {
- if (type & QEMU_AIO_WRITE) {
+ if (type & (QEMU_AIO_WRITE | QEMU_AIO_ZONE_APPEND)) {
update_zones_wp(bs, s->fd, 0, 1);
}
}
- if (type & QEMU_AIO_WRITE && wps) {
+ if ((type & (QEMU_AIO_WRITE | QEMU_AIO_ZONE_APPEND)) && wps) {
qemu_co_mutex_unlock(&wps->colock);
}
}
@@ -3519,6 +3528,40 @@ static int coroutine_fn raw_co_zone_mgmt(BlockDriverState *bs, BlockZoneOp op,
}
#endif
+#if defined(CONFIG_BLKZONED)
+static int coroutine_fn raw_co_zone_append(BlockDriverState *bs,
+ int64_t *offset,
+ QEMUIOVector *qiov,
+ BdrvRequestFlags flags) {
+ assert(flags == 0);
+ int64_t zone_size_mask = bs->bl.zone_size - 1;
+ int64_t iov_len = 0;
+ int64_t len = 0;
+ BDRVRawState *s = bs->opaque;
+ s->offset = offset;
+
+ if (*offset & zone_size_mask) {
+ error_report("sector offset %" PRId64 " is not aligned to zone size "
+ "%" PRId32 "", *offset / 512, bs->bl.zone_size / 512);
+ return -EINVAL;
+ }
+
+ int64_t wg = bs->bl.write_granularity;
+ int64_t wg_mask = wg - 1;
+ for (int i = 0; i < qiov->niov; i++) {
+ iov_len = qiov->iov[i].iov_len;
+ if (iov_len & wg_mask) {
+ error_report("len of IOVector[%d] %" PRId64 " is not aligned to "
+ "block size %" PRId64 "", i, iov_len, wg);
+ return -EINVAL;
+ }
+ len += iov_len;
+ }
+
+ return raw_co_prw(bs, *offset, len, qiov, QEMU_AIO_ZONE_APPEND);
+}
+#endif
+
static coroutine_fn int
raw_do_pdiscard(BlockDriverState *bs, int64_t offset, int64_t bytes,
bool blkdev)
@@ -4280,6 +4323,7 @@ static BlockDriver bdrv_host_device = {
/* zone management operations */
.bdrv_co_zone_report = raw_co_zone_report,
.bdrv_co_zone_mgmt = raw_co_zone_mgmt,
+ .bdrv_co_zone_append = raw_co_zone_append,
#endif
};
@@ -3156,6 +3156,33 @@ out:
return co.ret;
}
+int coroutine_fn bdrv_co_zone_append(BlockDriverState *bs, int64_t *offset,
+ QEMUIOVector *qiov,
+ BdrvRequestFlags flags)
+{
+ int ret;
+ BlockDriver *drv = bs->drv;
+ CoroutineIOCompletion co = {
+ .coroutine = qemu_coroutine_self(),
+ };
+ IO_CODE();
+
+ ret = bdrv_check_qiov_request(*offset, qiov->size, qiov, 0, NULL);
+ if (ret < 0) {
+ return ret;
+ }
+
+ bdrv_inc_in_flight(bs);
+ if (!drv || !drv->bdrv_co_zone_append || bs->bl.zoned == BLK_Z_NONE) {
+ co.ret = -ENOTSUP;
+ goto out;
+ }
+ co.ret = drv->bdrv_co_zone_append(bs, offset, qiov, flags);
+out:
+ bdrv_dec_in_flight(bs);
+ return co.ret;
+}
+
void *qemu_blockalign(BlockDriverState *bs, size_t size)
{
IO_CODE();
@@ -350,6 +350,10 @@ static int luring_do_submit(int fd, LuringAIOCB *luringcb, LuringState *s,
io_uring_prep_writev(sqes, fd, luringcb->qiov->iov,
luringcb->qiov->niov, offset);
break;
+ case QEMU_AIO_ZONE_APPEND:
+ io_uring_prep_writev(sqes, fd, luringcb->qiov->iov,
+ luringcb->qiov->niov, offset);
+ break;
case QEMU_AIO_READ:
io_uring_prep_readv(sqes, fd, luringcb->qiov->iov,
luringcb->qiov->niov, offset);
@@ -394,6 +394,9 @@ static int laio_do_submit(int fd, struct qemu_laiocb *laiocb, off_t offset,
case QEMU_AIO_WRITE:
io_prep_pwritev(iocbs, fd, qiov->iov, qiov->niov, offset);
break;
+ case QEMU_AIO_ZONE_APPEND:
+ io_prep_pwritev(iocbs, fd, qiov->iov, qiov->niov, offset);
+ break;
case QEMU_AIO_READ:
io_prep_preadv(iocbs, fd, qiov->iov, qiov->niov, offset);
break;
@@ -332,6 +332,13 @@ raw_co_zone_mgmt(BlockDriverState *bs, BlockZoneOp op,
return bdrv_co_zone_mgmt(bs->file->bs, op, offset, len);
}
+static int coroutine_fn GRAPH_RDLOCK
+raw_co_zone_append(BlockDriverState *bs,int64_t *offset, QEMUIOVector *qiov,
+ BdrvRequestFlags flags)
+{
+ return bdrv_co_zone_append(bs->file->bs, offset, qiov, flags);
+}
+
static int64_t coroutine_fn GRAPH_RDLOCK
raw_co_getlength(BlockDriverState *bs)
{
@@ -637,6 +644,7 @@ BlockDriver bdrv_raw = {
.bdrv_co_pdiscard = &raw_co_pdiscard,
.bdrv_co_zone_report = &raw_co_zone_report,
.bdrv_co_zone_mgmt = &raw_co_zone_mgmt,
+ .bdrv_co_zone_append = &raw_co_zone_append,
.bdrv_co_block_status = &raw_co_block_status,
.bdrv_co_copy_range_from = &raw_co_copy_range_from,
.bdrv_co_copy_range_to = &raw_co_copy_range_to,