@@ -29,6 +29,8 @@ static const VirtIOFeature feature_sizes[] = {
.end = endof(struct virtio_blk_config, discard_sector_alignment)},
{.flags = 1ULL << VIRTIO_BLK_F_WRITE_ZEROES,
.end = endof(struct virtio_blk_config, write_zeroes_may_unmap)},
+ {.flags = 1ULL << VIRTIO_BLK_F_ZONED,
+ .end = endof(struct virtio_blk_config, zoned)},
{}
};
@@ -17,6 +17,7 @@
#include "qemu/module.h"
#include "qemu/error-report.h"
#include "qemu/main-loop.h"
+#include "block/block_int.h"
#include "trace.h"
#include "hw/block/block.h"
#include "hw/qdev-properties.h"
@@ -601,6 +602,335 @@ err:
return err_status;
}
+typedef struct ZoneCmdData {
+ VirtIOBlockReq *req;
+ struct iovec *in_iov;
+ unsigned in_num;
+ union {
+ struct {
+ unsigned int nr_zones;
+ BlockZoneDescriptor *zones;
+ } zone_report_data;
+ struct {
+ int64_t offset;
+ } zone_append_data;
+ };
+} ZoneCmdData;
+
+/*
+ * check zoned_request: error checking before issuing requests. If all checks
+ * passed, return true.
+ * append: true if only zone append requests issued.
+ */
+static bool check_zoned_request(VirtIOBlock *s, int64_t offset, int64_t len,
+ bool append, uint8_t *status) {
+ BlockDriverState *bs = blk_bs(s->blk);
+ int index;
+
+ if (!virtio_has_feature(s->host_features, VIRTIO_BLK_F_ZONED)) {
+ *status = VIRTIO_BLK_S_UNSUPP;
+ return false;
+ }
+
+ if (offset < 0 || len < 0 || len > (bs->total_sectors << BDRV_SECTOR_BITS)
+ || offset > (bs->total_sectors << BDRV_SECTOR_BITS) - len) {
+ *status = VIRTIO_BLK_S_ZONE_INVALID_CMD;
+ return false;
+ }
+
+ if (append) {
+ if (bs->bl.write_granularity) {
+ if ((offset % bs->bl.write_granularity) != 0) {
+ *status = VIRTIO_BLK_S_ZONE_UNALIGNED_WP;
+ return false;
+ }
+ }
+
+ index = offset / bs->bl.zone_size;
+ if (BDRV_ZT_IS_CONV(bs->wps->wp[index])) {
+ *status = VIRTIO_BLK_S_ZONE_INVALID_CMD;
+ return false;
+ }
+
+ if (len / 512 > bs->bl.max_append_sectors) {
+ if (bs->bl.max_append_sectors == 0) {
+ *status = VIRTIO_BLK_S_UNSUPP;
+ } else {
+ *status = VIRTIO_BLK_S_ZONE_INVALID_CMD;
+ }
+ return false;
+ }
+ }
+ return true;
+}
+
+static void virtio_blk_zone_report_complete(void *opaque, int ret)
+{
+ ZoneCmdData *data = opaque;
+ VirtIOBlockReq *req = data->req;
+ VirtIOBlock *s = req->dev;
+ VirtIODevice *vdev = VIRTIO_DEVICE(req->dev);
+ struct iovec *in_iov = data->in_iov;
+ unsigned in_num = data->in_num;
+ int64_t zrp_size, n, j = 0;
+ int64_t nz = data->zone_report_data.nr_zones;
+ int8_t err_status = VIRTIO_BLK_S_OK;
+
+ if (ret) {
+ err_status = VIRTIO_BLK_S_ZONE_INVALID_CMD;
+ goto out;
+ }
+
+ struct virtio_blk_zone_report zrp_hdr = (struct virtio_blk_zone_report) {
+ .nr_zones = cpu_to_le64(nz),
+ };
+ zrp_size = sizeof(struct virtio_blk_zone_report)
+ + sizeof(struct virtio_blk_zone_descriptor) * nz;
+ n = iov_from_buf(in_iov, in_num, 0, &zrp_hdr, sizeof(zrp_hdr));
+ if (n != sizeof(zrp_hdr)) {
+ virtio_error(vdev, "Driver provided input buffer that is too small!");
+ err_status = VIRTIO_BLK_S_ZONE_INVALID_CMD;
+ goto out;
+ }
+
+ for (size_t i = sizeof(zrp_hdr); i < zrp_size;
+ i += sizeof(struct virtio_blk_zone_descriptor), ++j) {
+ struct virtio_blk_zone_descriptor desc =
+ (struct virtio_blk_zone_descriptor) {
+ .z_start = cpu_to_le64(data->zone_report_data.zones[j].start
+ >> BDRV_SECTOR_BITS),
+ .z_cap = cpu_to_le64(data->zone_report_data.zones[j].cap
+ >> BDRV_SECTOR_BITS),
+ .z_wp = cpu_to_le64(data->zone_report_data.zones[j].wp
+ >> BDRV_SECTOR_BITS),
+ };
+
+ switch (data->zone_report_data.zones[j].type) {
+ case BLK_ZT_CONV:
+ desc.z_type = VIRTIO_BLK_ZT_CONV;
+ break;
+ case BLK_ZT_SWR:
+ desc.z_type = VIRTIO_BLK_ZT_SWR;
+ break;
+ case BLK_ZT_SWP:
+ desc.z_type = VIRTIO_BLK_ZT_SWP;
+ break;
+ default:
+ g_assert_not_reached();
+ }
+
+ switch (data->zone_report_data.zones[j].state) {
+ case BLK_ZS_RDONLY:
+ desc.z_state = VIRTIO_BLK_ZS_RDONLY;
+ break;
+ case BLK_ZS_OFFLINE:
+ desc.z_state = VIRTIO_BLK_ZS_OFFLINE;
+ break;
+ case BLK_ZS_EMPTY:
+ desc.z_state = VIRTIO_BLK_ZS_EMPTY;
+ break;
+ case BLK_ZS_CLOSED:
+ desc.z_state = VIRTIO_BLK_ZS_CLOSED;
+ break;
+ case BLK_ZS_FULL:
+ desc.z_state = VIRTIO_BLK_ZS_FULL;
+ break;
+ case BLK_ZS_EOPEN:
+ desc.z_state = VIRTIO_BLK_ZS_EOPEN;
+ break;
+ case BLK_ZS_IOPEN:
+ desc.z_state = VIRTIO_BLK_ZS_IOPEN;
+ break;
+ case BLK_ZS_NOT_WP:
+ desc.z_state = VIRTIO_BLK_ZS_NOT_WP;
+ break;
+ default:
+ g_assert_not_reached();
+ }
+
+ /* TODO: it takes O(n^2) time complexity. Optimizations required. */
+ n = iov_from_buf(in_iov, in_num, i, &desc, sizeof(desc));
+ if (n != sizeof(desc)) {
+ virtio_error(vdev, "Driver provided input buffer "
+ "for descriptors that is too small!");
+ err_status = VIRTIO_BLK_S_ZONE_INVALID_CMD;
+ }
+ }
+
+out:
+ aio_context_acquire(blk_get_aio_context(s->conf.conf.blk));
+ virtio_blk_req_complete(req, err_status);
+ virtio_blk_free_request(req);
+ aio_context_release(blk_get_aio_context(s->conf.conf.blk));
+ g_free(data->zone_report_data.zones);
+ g_free(data);
+}
+
+static void virtio_blk_handle_zone_report(VirtIOBlockReq *req,
+ struct iovec *in_iov,
+ unsigned in_num)
+{
+ VirtIOBlock *s = req->dev;
+ VirtIODevice *vdev = VIRTIO_DEVICE(s);
+ unsigned int nr_zones;
+ ZoneCmdData *data;
+ int64_t zone_size, offset;
+ uint8_t err_status;
+
+ if (req->in_len < sizeof(struct virtio_blk_inhdr) +
+ sizeof(struct virtio_blk_zone_report) +
+ sizeof(struct virtio_blk_zone_descriptor)) {
+ virtio_error(vdev, "in buffer too small for zone report");
+ return;
+ }
+
+ /* start byte offset of the zone report */
+ offset = virtio_ldq_p(vdev, &req->out.sector) << BDRV_SECTOR_BITS;
+ if (!check_zoned_request(s, offset, 0, false, &err_status)) {
+ goto out;
+ }
+ nr_zones = (req->in_len - sizeof(struct virtio_blk_inhdr) -
+ sizeof(struct virtio_blk_zone_report)) /
+ sizeof(struct virtio_blk_zone_descriptor);
+
+ zone_size = sizeof(BlockZoneDescriptor) * nr_zones;
+ data = g_malloc(sizeof(ZoneCmdData));
+ data->req = req;
+ data->in_iov = in_iov;
+ data->in_num = in_num;
+ data->zone_report_data.nr_zones = nr_zones;
+ data->zone_report_data.zones = g_malloc(zone_size),
+
+ blk_aio_zone_report(s->blk, offset, &data->zone_report_data.nr_zones,
+ data->zone_report_data.zones,
+ virtio_blk_zone_report_complete, data);
+ return;
+out:
+ virtio_blk_req_complete(req, err_status);
+ virtio_blk_free_request(req);
+}
+
+static void virtio_blk_zone_mgmt_complete(void *opaque, int ret)
+{
+ VirtIOBlockReq *req = opaque;
+ VirtIOBlock *s = req->dev;
+ int8_t err_status = VIRTIO_BLK_S_OK;
+
+ if (ret) {
+ err_status = VIRTIO_BLK_S_ZONE_INVALID_CMD;
+ }
+
+ aio_context_acquire(blk_get_aio_context(s->conf.conf.blk));
+ virtio_blk_req_complete(req, err_status);
+ virtio_blk_free_request(req);
+ aio_context_release(blk_get_aio_context(s->conf.conf.blk));
+}
+
+static int virtio_blk_handle_zone_mgmt(VirtIOBlockReq *req, BlockZoneOp op)
+{
+ VirtIOBlock *s = req->dev;
+ VirtIODevice *vdev = VIRTIO_DEVICE(s);
+ BlockDriverState *bs = blk_bs(s->blk);
+ int64_t offset = virtio_ldq_p(vdev, &req->out.sector) << BDRV_SECTOR_BITS;
+ uint64_t len;
+ uint64_t capacity = bs->total_sectors << BDRV_SECTOR_BITS;
+ uint8_t err_status = VIRTIO_BLK_S_OK;
+
+ uint32_t type = virtio_ldl_p(vdev, &req->out.type);
+ if (type == VIRTIO_BLK_T_ZONE_RESET_ALL) {
+ /* Entire drive capacity */
+ offset = 0;
+ len = capacity;
+ } else {
+ if (bs->bl.zone_size > capacity - offset) {
+ /* The zoned device allows the last smaller zone. */
+ len = capacity - bs->bl.zone_size * (bs->bl.nr_zones - 1);
+ } else {
+ len = bs->bl.zone_size;
+ }
+ }
+
+ if (!check_zoned_request(s, offset, len, false, &err_status)) {
+ goto out;
+ }
+
+ blk_aio_zone_mgmt(s->blk, op, offset, len,
+ virtio_blk_zone_mgmt_complete, req);
+
+ return 0;
+out:
+ virtio_blk_req_complete(req, err_status);
+ virtio_blk_free_request(req);
+ return err_status;
+}
+
+static void virtio_blk_zone_append_complete(void *opaque, int ret)
+{
+ ZoneCmdData *data = opaque;
+ VirtIOBlockReq *req = data->req;
+ VirtIOBlock *s = req->dev;
+ VirtIODevice *vdev = VIRTIO_DEVICE(req->dev);
+ int64_t append_sector, n;
+ uint8_t err_status = VIRTIO_BLK_S_OK;
+
+ if (ret) {
+ err_status = VIRTIO_BLK_S_ZONE_INVALID_CMD;
+ goto out;
+ }
+
+ virtio_stq_p(vdev, &append_sector,
+ data->zone_append_data.offset >> BDRV_SECTOR_BITS);
+ n = iov_from_buf(data->in_iov, data->in_num, 0, &append_sector,
+ sizeof(append_sector));
+ if (n != sizeof(append_sector)) {
+ virtio_error(vdev, "Driver provided input buffer less than size of "
+ "append_sector");
+ err_status = VIRTIO_BLK_S_ZONE_INVALID_CMD;
+ goto out;
+ }
+
+out:
+ aio_context_acquire(blk_get_aio_context(s->conf.conf.blk));
+ virtio_blk_req_complete(req, err_status);
+ virtio_blk_free_request(req);
+ aio_context_release(blk_get_aio_context(s->conf.conf.blk));
+ g_free(data);
+}
+
+static int virtio_blk_handle_zone_append(VirtIOBlockReq *req,
+ struct iovec *out_iov,
+ struct iovec *in_iov,
+ uint64_t out_num,
+ unsigned in_num) {
+ VirtIOBlock *s = req->dev;
+ VirtIODevice *vdev = VIRTIO_DEVICE(s);
+ uint8_t err_status = VIRTIO_BLK_S_OK;
+
+ int64_t offset = virtio_ldq_p(vdev, &req->out.sector) << BDRV_SECTOR_BITS;
+ int64_t len = iov_size(out_iov, out_num);
+
+ if (!check_zoned_request(s, offset, len, true, &err_status)) {
+ goto out;
+ }
+
+ ZoneCmdData *data = g_malloc(sizeof(ZoneCmdData));
+ data->req = req;
+ data->in_iov = in_iov;
+ data->in_num = in_num;
+ data->zone_append_data.offset = offset;
+ qemu_iovec_init_external(&req->qiov, out_iov, out_num);
+ blk_aio_zone_append(s->blk, &data->zone_append_data.offset, &req->qiov, 0,
+ virtio_blk_zone_append_complete, data);
+ return 0;
+
+out:
+ aio_context_acquire(blk_get_aio_context(s->conf.conf.blk));
+ virtio_blk_req_complete(req, err_status);
+ virtio_blk_free_request(req);
+ aio_context_release(blk_get_aio_context(s->conf.conf.blk));
+ return err_status;
+}
+
static int virtio_blk_handle_request(VirtIOBlockReq *req, MultiReqBuffer *mrb)
{
uint32_t type;
@@ -687,6 +1017,24 @@ static int virtio_blk_handle_request(VirtIOBlockReq *req, MultiReqBuffer *mrb)
case VIRTIO_BLK_T_FLUSH:
virtio_blk_handle_flush(req, mrb);
break;
+ case VIRTIO_BLK_T_ZONE_REPORT:
+ virtio_blk_handle_zone_report(req, in_iov, in_num);
+ break;
+ case VIRTIO_BLK_T_ZONE_OPEN:
+ virtio_blk_handle_zone_mgmt(req, BLK_ZO_OPEN);
+ break;
+ case VIRTIO_BLK_T_ZONE_CLOSE:
+ virtio_blk_handle_zone_mgmt(req, BLK_ZO_CLOSE);
+ break;
+ case VIRTIO_BLK_T_ZONE_FINISH:
+ virtio_blk_handle_zone_mgmt(req, BLK_ZO_FINISH);
+ break;
+ case VIRTIO_BLK_T_ZONE_RESET:
+ virtio_blk_handle_zone_mgmt(req, BLK_ZO_RESET);
+ break;
+ case VIRTIO_BLK_T_ZONE_RESET_ALL:
+ virtio_blk_handle_zone_mgmt(req, BLK_ZO_RESET);
+ break;
case VIRTIO_BLK_T_SCSI_CMD:
virtio_blk_handle_scsi(req);
break;
@@ -705,6 +1053,14 @@ static int virtio_blk_handle_request(VirtIOBlockReq *req, MultiReqBuffer *mrb)
virtio_blk_free_request(req);
break;
}
+ case VIRTIO_BLK_T_ZONE_APPEND & ~VIRTIO_BLK_T_OUT:
+ /*
+ * Passing out_iov/out_num and in_iov/in_num is not safe
+ * to access req->elem.out_sg directly because it may be
+ * modified by virtio_blk_handle_request().
+ */
+ virtio_blk_handle_zone_append(req, out_iov, in_iov, out_num, in_num);
+ break;
/*
* VIRTIO_BLK_T_DISCARD and VIRTIO_BLK_T_WRITE_ZEROES are defined with
* VIRTIO_BLK_T_OUT flag set. We masked this flag in the switch statement,
@@ -890,6 +1246,7 @@ static void virtio_blk_update_config(VirtIODevice *vdev, uint8_t *config)
{
VirtIOBlock *s = VIRTIO_BLK(vdev);
BlockConf *conf = &s->conf.conf;
+ BlockDriverState *bs = blk_bs(s->blk);
struct virtio_blk_config blkcfg;
uint64_t capacity;
int64_t length;
@@ -954,6 +1311,30 @@ static void virtio_blk_update_config(VirtIODevice *vdev, uint8_t *config)
blkcfg.write_zeroes_may_unmap = 1;
virtio_stl_p(vdev, &blkcfg.max_write_zeroes_seg, 1);
}
+ if (bs->bl.zoned != BLK_Z_NONE) {
+ switch (bs->bl.zoned) {
+ case BLK_Z_HM:
+ blkcfg.zoned.model = VIRTIO_BLK_Z_HM;
+ break;
+ case BLK_Z_HA:
+ blkcfg.zoned.model = VIRTIO_BLK_Z_HA;
+ break;
+ default:
+ g_assert_not_reached();
+ }
+
+ virtio_stl_p(vdev, &blkcfg.zoned.zone_sectors,
+ bs->bl.zone_size / 512);
+ virtio_stl_p(vdev, &blkcfg.zoned.max_active_zones,
+ bs->bl.max_active_zones);
+ virtio_stl_p(vdev, &blkcfg.zoned.max_open_zones,
+ bs->bl.max_open_zones);
+ virtio_stl_p(vdev, &blkcfg.zoned.write_granularity, blk_size);
+ virtio_stl_p(vdev, &blkcfg.zoned.max_append_sectors,
+ bs->bl.max_append_sectors);
+ } else {
+ blkcfg.zoned.model = VIRTIO_BLK_Z_NONE;
+ }
memcpy(config, &blkcfg, s->config_size);
}
@@ -1163,6 +1544,14 @@ static void virtio_blk_device_realize(DeviceState *dev, Error **errp)
return;
}
+ BlockDriverState *bs = blk_bs(conf->conf.blk);
+ if (bs->bl.zoned != BLK_Z_NONE) {
+ virtio_add_feature(&s->host_features, VIRTIO_BLK_F_ZONED);
+ if (bs->bl.zoned == BLK_Z_HM) {
+ virtio_clear_feature(&s->host_features, VIRTIO_BLK_F_DISCARD);
+ }
+ }
+
if (virtio_has_feature(s->host_features, VIRTIO_BLK_F_DISCARD) &&
(!conf->max_discard_sectors ||
conf->max_discard_sectors > BDRV_REQUEST_MAX_SECTORS)) {
@@ -176,6 +176,8 @@ static const qmp_virtio_feature_map_t virtio_blk_feature_map[] = {
"VIRTIO_BLK_F_DISCARD: Discard command supported"),
FEATURE_ENTRY(VIRTIO_BLK_F_WRITE_ZEROES, \
"VIRTIO_BLK_F_WRITE_ZEROES: Write zeroes command supported"),
+ FEATURE_ENTRY(VIRTIO_BLK_F_ZONED, \
+ "VIRTIO_BLK_F_ZONED: Zoned block devices"),
#ifndef VIRTIO_BLK_NO_LEGACY
FEATURE_ENTRY(VIRTIO_BLK_F_BARRIER, \
"VIRTIO_BLK_F_BARRIER: Request barriers supported"),