diff mbox series

[RFC,5/5] hw/nvme: make ZDED persistent

Message ID 20230816070842.5423-2-faithilikerun@gmail.com (mailing list archive)
State New, archived
Headers show
Series None | expand

Commit Message

Sam Li Aug. 16, 2023, 7:08 a.m. UTC
Zone descriptor extension data (ZDED) is not persistent across QEMU
restarts. The zone descriptor extension valid bit (ZDEV) is part of
zone attributes, which sets to one when the ZDED is associated with
the zone.

With the qcow2-ZNS file as the backing file, the NVMe ZNS device stores
the zone attributes at the following eight bit of zoned bit of write
pointers for each zone. The ZDED is stored as part of zoned metadata as
write pointers.

Signed-off-by: Sam Li <faithilikerun@gmail.com>
---
 block/qcow2.c                | 44 +++++++++++++++++++++++++++++++++++-
 hw/nvme/ctrl.c               |  6 +----
 include/block/block-common.h |  1 +
 3 files changed, 45 insertions(+), 6 deletions(-)
diff mbox series

Patch

diff --git a/block/qcow2.c b/block/qcow2.c
index 5a038792f1..ac5ecef559 100644
--- a/block/qcow2.c
+++ b/block/qcow2.c
@@ -25,6 +25,7 @@ 
 #include "qemu/osdep.h"
 
 #include "block/qdict.h"
+#include "block/nvme.h"
 #include "sysemu/block-backend.h"
 #include "qemu/main-loop.h"
 #include "qemu/module.h"
@@ -214,6 +215,17 @@  static inline void qcow2_set_wp(uint64_t *wp, BlockZoneState zs)
     *wp = addr;
 }
 
+static inline void qcow2_set_za(uint64_t *wp, uint8_t za)
+{
+    /*
+     * The zone attribute takes up one byte. Store it after the zoned
+     * bit.
+     */
+    uint64_t addr = *wp;
+    addr |= ((uint64_t)za << 51);
+    *wp = addr;
+}
+
 /*
  * File wp tracking: reset zone, finish zone and append zone can
  * change the value of write pointer. All zone operations will change
@@ -308,7 +320,7 @@  static int qcow2_check_open(BlockDriverState *bs)
 
 /*
  * The zoned device has limited zone resources of open, closed, active
- * zones.
+ * zones. Check if we can manage a zone without exceeding those limits.
  */
 static int qcow2_check_zone_resources(BlockDriverState *bs,
                                       BlockZoneState zs)
@@ -4801,6 +4813,33 @@  unlock:
     return ret;
 }
 
+static int qcow2_zns_set_zded(BlockDriverState *bs, uint32_t index)
+{
+    BDRVQcow2State *s = bs->opaque;
+    int ret;
+
+    qemu_co_mutex_lock(&s->wps->colock);
+    uint64_t *wp = &s->wps->wp[index];
+    BlockZoneState zs = qcow2_get_zs(*wp);
+    if (zs == BLK_ZS_EMPTY) {
+        ret = qcow2_check_zone_resources(bs, zs);
+        if (ret < 0) {
+            return ret;
+        }
+
+        qcow2_set_za(wp, NVME_ZA_ZD_EXT_VALID);
+        ret = qcow2_write_wp_at(bs, wp, index, BLK_ZO_CLOSE);
+        if (ret < 0) {
+            error_report("Failed to set zone extension at 0x%" PRIx64 "", *wp);
+            return ret;
+        }
+        s->nr_zones_closed++;
+        return ret;
+    }
+
+    return NVME_ZONE_INVAL_TRANSITION;
+}
+
 static int coroutine_fn qcow2_co_zone_mgmt(BlockDriverState *bs, BlockZoneOp op,
                                            int64_t offset, int64_t len)
 {
@@ -4857,6 +4896,9 @@  static int coroutine_fn qcow2_co_zone_mgmt(BlockDriverState *bs, BlockZoneOp op,
     case BLK_ZO_OFFLINE:
         ret = qcow2_write_wp_at(bs, &wps->wp[index], index, BLK_ZO_OFFLINE);
         break;
+    case BLK_ZO_SET_ZDED:
+        ret = qcow2_zns_set_zded(bs, index);
+        break;
     default:
         error_report("Unsupported zone op: 0x%x", op);
         ret = -ENOTSUP;
diff --git a/hw/nvme/ctrl.c b/hw/nvme/ctrl.c
index 3932b516ed..fcd774e3f7 100644
--- a/hw/nvme/ctrl.c
+++ b/hw/nvme/ctrl.c
@@ -3425,11 +3425,6 @@  static uint16_t nvme_zone_mgmt_send(NvmeCtrl *n, NvmeRequest *req)
     NvmeNamespace *ns = req->ns;
     NvmeZoneMgmtAIOCB *iocb;
     uint64_t slba = 0;
-    uint64_t offset;
-    BlockBackend *blk = ns->blkconf.blk;
-    uint32_t zone_size = blk_get_zone_size(blk);
-    uint64_t size = zone_size * blk_get_nr_zones(blk);
-    int64_t len;
     uint32_t zone_idx = 0;
     uint16_t status;
     uint8_t action = cmd->zsa;
@@ -3485,6 +3480,7 @@  static uint16_t nvme_zone_mgmt_send(NvmeCtrl *n, NvmeRequest *req)
         break;
 
     case NVME_ZONE_ACTION_SET_ZD_EXT:
+        op = BLK_ZO_SET_ZDED;
         int zd_ext_size = blk_get_zd_ext_size(blk);
         trace_pci_nvme_set_descriptor_extension(slba, zone_idx);
         if (all || !zd_ext_size) {
diff --git a/include/block/block-common.h b/include/block/block-common.h
index 0cbed607a8..b369e77607 100644
--- a/include/block/block-common.h
+++ b/include/block/block-common.h
@@ -84,6 +84,7 @@  typedef enum BlockZoneOp {
     BLK_ZO_FINISH,
     BLK_ZO_RESET,
     BLK_ZO_OFFLINE,
+    BLK_ZO_SET_ZDED,
 } BlockZoneOp;
 
 typedef enum BlockZoneModel {