@@ -147,6 +147,30 @@ out:
aio_context_release(blk_get_aio_context(s->conf.conf.blk));
}
+static void virtio_blk_discard_write_zeroes_complete(void *opaque, int ret)
+{
+ VirtIOBlockReq *req = opaque;
+ VirtIOBlock *s = req->dev;
+ bool is_write_zeroes = (virtio_ldl_p(VIRTIO_DEVICE(s), &req->out.type) &
+ ~VIRTIO_BLK_T_BARRIER) == VIRTIO_BLK_T_WRITE_ZEROES;
+
+ aio_context_acquire(blk_get_aio_context(s->conf.conf.blk));
+ if (ret) {
+ if (virtio_blk_handle_rw_error(req, -ret, false, is_write_zeroes)) {
+ goto out;
+ }
+ }
+
+ virtio_blk_req_complete(req, VIRTIO_BLK_S_OK);
+ if (is_write_zeroes) {
+ block_acct_done(blk_get_stats(s->blk), &req->acct);
+ }
+ virtio_blk_free_request(req);
+
+out:
+ aio_context_release(blk_get_aio_context(s->conf.conf.blk));
+}
+
#ifdef __linux__
typedef struct {
@@ -480,6 +504,84 @@ static bool virtio_blk_sect_range_ok(VirtIOBlock *dev,
return true;
}
+static uint8_t virtio_blk_handle_discard_write_zeroes(VirtIOBlockReq *req,
+ struct virtio_blk_discard_write_zeroes *dwz_hdr, bool is_write_zeroes)
+{
+ VirtIOBlock *s = req->dev;
+ VirtIODevice *vdev = VIRTIO_DEVICE(s);
+ uint64_t sector;
+ uint32_t num_sectors, flags, max_sectors;
+ uint8_t err_status;
+ int bytes;
+
+ sector = virtio_ldq_p(vdev, &dwz_hdr->sector);
+ num_sectors = virtio_ldl_p(vdev, &dwz_hdr->num_sectors);
+ flags = virtio_ldl_p(vdev, &dwz_hdr->flags);
+ max_sectors = is_write_zeroes ? s->conf.max_write_zeroes_sectors :
+ s->conf.max_discard_sectors;
+
+ /*
+ * max_sectors is at most BDRV_REQUEST_MAX_SECTORS, this check
+ * make us sure that "num_sectors << BDRV_SECTOR_BITS" can fit in
+ * the integer variable.
+ */
+ if (unlikely(num_sectors > max_sectors)) {
+ err_status = VIRTIO_BLK_S_IOERR;
+ goto err;
+ }
+
+ bytes = num_sectors << BDRV_SECTOR_BITS;
+
+ if (unlikely(!virtio_blk_sect_range_ok(s, sector, bytes))) {
+ err_status = VIRTIO_BLK_S_IOERR;
+ goto err;
+ }
+
+ /*
+ * The device MUST set the status byte to VIRTIO_BLK_S_UNSUPP for discard
+ * and write zeroes commands if any unknown flag is set.
+ */
+ if (unlikely(flags & ~VIRTIO_BLK_WRITE_ZEROES_FLAG_UNMAP)) {
+ err_status = VIRTIO_BLK_S_UNSUPP;
+ goto err;
+ }
+
+ if (is_write_zeroes) { /* VIRTIO_BLK_T_WRITE_ZEROES */
+ int blk_aio_flags = 0;
+
+ if (flags & VIRTIO_BLK_WRITE_ZEROES_FLAG_UNMAP) {
+ blk_aio_flags |= BDRV_REQ_MAY_UNMAP;
+ }
+
+ block_acct_start(blk_get_stats(s->blk), &req->acct, bytes,
+ BLOCK_ACCT_WRITE);
+
+ blk_aio_pwrite_zeroes(s->blk, sector << BDRV_SECTOR_BITS,
+ bytes, blk_aio_flags,
+ virtio_blk_discard_write_zeroes_complete, req);
+ } else { /* VIRTIO_BLK_T_DISCARD */
+ /*
+ * The device MUST set the status byte to VIRTIO_BLK_S_UNSUPP for
+ * discard commands if the unmap flag is set.
+ */
+ if (unlikely(flags & VIRTIO_BLK_WRITE_ZEROES_FLAG_UNMAP)) {
+ err_status = VIRTIO_BLK_S_UNSUPP;
+ goto err;
+ }
+
+ blk_aio_pdiscard(s->blk, sector << BDRV_SECTOR_BITS, bytes,
+ virtio_blk_discard_write_zeroes_complete, req);
+ }
+
+ return VIRTIO_BLK_S_OK;
+
+err:
+ if (is_write_zeroes) {
+ block_acct_invalid(blk_get_stats(s->blk), BLOCK_ACCT_WRITE);
+ }
+ return err_status;
+}
+
static int virtio_blk_handle_request(VirtIOBlockReq *req, MultiReqBuffer *mrb)
{
uint32_t type;
@@ -584,6 +686,47 @@ static int virtio_blk_handle_request(VirtIOBlockReq *req, MultiReqBuffer *mrb)
virtio_blk_free_request(req);
break;
}
+ /*
+ * VIRTIO_BLK_T_DISCARD and VIRTIO_BLK_T_WRITE_ZEROES are defined with
+ * VIRTIO_BLK_T_OUT flag set. We masked this flag in the switch statement,
+ * so we must mask it for these requests, then we will check if it is set.
+ */
+ case VIRTIO_BLK_T_DISCARD & ~VIRTIO_BLK_T_OUT:
+ case VIRTIO_BLK_T_WRITE_ZEROES & ~VIRTIO_BLK_T_OUT:
+ {
+ struct virtio_blk_discard_write_zeroes dwz_hdr;
+ size_t out_len = iov_size(out_iov, out_num);
+ bool is_write_zeroes = (type & ~VIRTIO_BLK_T_BARRIER) ==
+ VIRTIO_BLK_T_WRITE_ZEROES;
+ uint8_t err_status;
+
+ /*
+ * Unsupported if VIRTIO_BLK_T_OUT is not set or the request contains
+ * more than one segment.
+ */
+ if (unlikely(!(type & VIRTIO_BLK_T_OUT) ||
+ out_len > sizeof(dwz_hdr))) {
+ virtio_blk_req_complete(req, VIRTIO_BLK_S_UNSUPP);
+ virtio_blk_free_request(req);
+ return 0;
+ }
+
+ if (unlikely(iov_to_buf(out_iov, out_num, 0, &dwz_hdr,
+ sizeof(dwz_hdr)) != sizeof(dwz_hdr))) {
+ virtio_error(vdev, "virtio-blk discard/write_zeroes header"
+ " too short");
+ return -1;
+ }
+
+ err_status = virtio_blk_handle_discard_write_zeroes(req, &dwz_hdr,
+ is_write_zeroes);
+ if (err_status != VIRTIO_BLK_S_OK) {
+ virtio_blk_req_complete(req, err_status);
+ virtio_blk_free_request(req);
+ }
+
+ break;
+ }
default:
virtio_blk_req_complete(req, VIRTIO_BLK_S_UNSUPP);
virtio_blk_free_request(req);
@@ -763,6 +906,24 @@ static void virtio_blk_update_config(VirtIODevice *vdev, uint8_t *config)
blkcfg.alignment_offset = 0;
blkcfg.wce = blk_enable_write_cache(s->blk);
virtio_stw_p(vdev, &blkcfg.num_queues, s->conf.num_queues);
+ if (virtio_has_feature(s->host_features, VIRTIO_BLK_F_DISCARD)) {
+ virtio_stl_p(vdev, &blkcfg.max_discard_sectors,
+ s->conf.max_discard_sectors);
+ virtio_stl_p(vdev, &blkcfg.discard_sector_alignment,
+ blk_size >> BDRV_SECTOR_BITS);
+ /*
+ * We support only one segment per request since multiple segments
+ * are not widely used and there are no userspace APIs that allow
+ * applications to submit multiple segments in a single call.
+ */
+ virtio_stl_p(vdev, &blkcfg.max_discard_seg, 1);
+ }
+ if (virtio_has_feature(s->host_features, VIRTIO_BLK_F_WRITE_ZEROES)) {
+ virtio_stl_p(vdev, &blkcfg.max_write_zeroes_sectors,
+ s->conf.max_write_zeroes_sectors);
+ blkcfg.write_zeroes_may_unmap = 1;
+ virtio_stl_p(vdev, &blkcfg.max_write_zeroes_seg, 1);
+ }
memcpy(config, &blkcfg, sizeof(struct virtio_blk_config));
}
@@ -954,6 +1115,24 @@ static void virtio_blk_device_realize(DeviceState *dev, Error **errp)
return;
}
+ if (virtio_has_feature(s->host_features, VIRTIO_BLK_F_DISCARD) &&
+ (!conf->max_discard_sectors ||
+ conf->max_discard_sectors > BDRV_REQUEST_MAX_SECTORS)) {
+ error_setg(errp, "invalid max-discard-sectors property (%" PRIu32 ")"
+ ", must be between 1 and %lu",
+ conf->max_discard_sectors, BDRV_REQUEST_MAX_SECTORS);
+ return;
+ }
+
+ if (virtio_has_feature(s->host_features, VIRTIO_BLK_F_WRITE_ZEROES) &&
+ (!conf->max_write_zeroes_sectors ||
+ conf->max_write_zeroes_sectors > BDRV_REQUEST_MAX_SECTORS)) {
+ error_setg(errp, "invalid max-write-zeroes-sectors property (%" PRIu32
+ "), must be between 1 and %lu",
+ conf->max_write_zeroes_sectors, BDRV_REQUEST_MAX_SECTORS);
+ return;
+ }
+
virtio_init(vdev, "virtio-blk", VIRTIO_ID_BLOCK,
sizeof(struct virtio_blk_config));
@@ -1030,6 +1209,10 @@ static Property virtio_blk_properties[] = {
VIRTIO_BLK_F_DISCARD, true),
DEFINE_PROP_BIT64("write-zeroes", VirtIOBlock, host_features,
VIRTIO_BLK_F_WRITE_ZEROES, true),
+ DEFINE_PROP_UINT32("max-discard-sectors", VirtIOBlock,
+ conf.max_discard_sectors, BDRV_REQUEST_MAX_SECTORS),
+ DEFINE_PROP_UINT32("max-write-zeroes-sectors", VirtIOBlock,
+ conf.max_write_zeroes_sectors, BDRV_REQUEST_MAX_SECTORS),
DEFINE_PROP_END_OF_LIST(),
};
@@ -38,6 +38,8 @@ struct VirtIOBlkConf
uint32_t request_merging;
uint16_t num_queues;
uint16_t queue_size;
+ uint32_t max_discard_sectors;
+ uint32_t max_write_zeroes_sectors;
};
struct VirtIOBlockDataPlane;