diff mbox series

[v2,11/18] hw/block/nvme: Introduce max active and open zone limits

Message ID 20200617213415.22417-12-dmitry.fomichev@wdc.com (mailing list archive)
State New, archived
Headers show
Series hw/block/nvme: Support Namespace Types and Zoned Namespace Command Set | expand

Commit Message

Dmitry Fomichev June 17, 2020, 9:34 p.m. UTC
Added two module properties, "max_active" and "max_open" to control
the maximum number of zones that can be active or open. Once these
variables are set to non-default values, the driver checks these
limits during I/O and returns Too Many Active or Too Many Open
command status if they are exceeded.

Signed-off-by: Hans Holmberg <hans.holmberg@wdc.com>
Signed-off-by: Dmitry Fomichev <dmitry.fomichev@wdc.com>
---
 hw/block/nvme.c | 183 +++++++++++++++++++++++++++++++++++++++++++++++-
 hw/block/nvme.h |   4 ++
 2 files changed, 185 insertions(+), 2 deletions(-)

Comments

Alistair Francis July 1, 2020, 12:26 a.m. UTC | #1
On Wed, Jun 17, 2020 at 3:07 PM Dmitry Fomichev <dmitry.fomichev@wdc.com> wrote:
>
> Added two module properties, "max_active" and "max_open" to control
> the maximum number of zones that can be active or open. Once these
> variables are set to non-default values, the driver checks these
> limits during I/O and returns Too Many Active or Too Many Open
> command status if they are exceeded.
>
> Signed-off-by: Hans Holmberg <hans.holmberg@wdc.com>
> Signed-off-by: Dmitry Fomichev <dmitry.fomichev@wdc.com>
> ---
>  hw/block/nvme.c | 183 +++++++++++++++++++++++++++++++++++++++++++++++-
>  hw/block/nvme.h |   4 ++
>  2 files changed, 185 insertions(+), 2 deletions(-)
>
> diff --git a/hw/block/nvme.c b/hw/block/nvme.c
> index 2e03b0b6ed..05a7cbcfcc 100644
> --- a/hw/block/nvme.c
> +++ b/hw/block/nvme.c
> @@ -120,6 +120,87 @@ static void nvme_remove_zone(NvmeCtrl *n, NvmeNamespace *ns, NvmeZoneList *zl,
>      zone->prev = zone->next = 0;
>  }
>
> +/*
> + * Take the first zone out from a list, return NULL if the list is empty.
> + */
> +static NvmeZone *nvme_remove_zone_head(NvmeCtrl *n, NvmeNamespace *ns,
> +    NvmeZoneList *zl)
> +{
> +    NvmeZone *zone = nvme_peek_zone_head(ns, zl);
> +
> +    if (zone) {
> +        --zl->size;
> +        if (zl->size == 0) {
> +            zl->head = NVME_ZONE_LIST_NIL;
> +            zl->tail = NVME_ZONE_LIST_NIL;
> +        } else {
> +            zl->head = zone->next;
> +            ns->zone_array[zl->head].prev = NVME_ZONE_LIST_NIL;
> +        }
> +        zone->prev = zone->next = 0;
> +    }
> +
> +    return zone;
> +}
> +
> +/*
> + * Check if we can open a zone without exceeding open/active limits.
> + * AOR stands for "Active and Open Resources" (see TP 4053 section 2.5).
> + */
> +static int nvme_aor_check(NvmeCtrl *n, NvmeNamespace *ns,
> +     uint32_t act, uint32_t opn)
> +{
> +    if (n->params.max_active_zones != 0 &&
> +        ns->nr_active_zones + act > n->params.max_active_zones) {
> +        trace_pci_nvme_err_insuff_active_res(n->params.max_active_zones);
> +        return NVME_ZONE_TOO_MANY_ACTIVE | NVME_DNR;
> +    }
> +    if (n->params.max_open_zones != 0 &&
> +        ns->nr_open_zones + opn > n->params.max_open_zones) {
> +        trace_pci_nvme_err_insuff_open_res(n->params.max_open_zones);
> +        return NVME_ZONE_TOO_MANY_OPEN | NVME_DNR;
> +    }
> +
> +    return NVME_SUCCESS;
> +}
> +
> +static inline void nvme_aor_inc_open(NvmeCtrl *n, NvmeNamespace *ns)
> +{
> +    assert(ns->nr_open_zones >= 0);
> +    if (n->params.max_open_zones) {
> +        ns->nr_open_zones++;
> +        assert(ns->nr_open_zones <= n->params.max_open_zones);
> +    }
> +}
> +
> +static inline void nvme_aor_dec_open(NvmeCtrl *n, NvmeNamespace *ns)
> +{
> +    if (n->params.max_open_zones) {
> +        assert(ns->nr_open_zones > 0);
> +        ns->nr_open_zones--;
> +    }
> +    assert(ns->nr_open_zones >= 0);
> +}
> +
> +static inline void nvme_aor_inc_active(NvmeCtrl *n, NvmeNamespace *ns)
> +{
> +    assert(ns->nr_active_zones >= 0);
> +    if (n->params.max_active_zones) {
> +        ns->nr_active_zones++;
> +        assert(ns->nr_active_zones <= n->params.max_active_zones);
> +    }
> +}
> +
> +static inline void nvme_aor_dec_active(NvmeCtrl *n, NvmeNamespace *ns)
> +{
> +    if (n->params.max_active_zones) {
> +        assert(ns->nr_active_zones > 0);
> +        ns->nr_active_zones--;
> +        assert(ns->nr_active_zones >= ns->nr_open_zones);
> +    }
> +    assert(ns->nr_active_zones >= 0);
> +}
> +
>  static void nvme_assign_zone_state(NvmeCtrl *n, NvmeNamespace *ns,
>      NvmeZone *zone, uint8_t state)
>  {
> @@ -454,6 +535,24 @@ static void nvme_enqueue_req_completion(NvmeCQueue *cq, NvmeRequest *req)
>      timer_mod(cq->timer, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + 500);
>  }
>
> +static void nvme_auto_transition_zone(NvmeCtrl *n, NvmeNamespace *ns,
> +    bool implicit, bool adding_active)
> +{
> +    NvmeZone *zone;
> +
> +    if (implicit && n->params.max_open_zones &&
> +        ns->nr_open_zones == n->params.max_open_zones) {
> +        zone = nvme_remove_zone_head(n, ns, ns->imp_open_zones);
> +        if (zone) {
> +            /*
> +             * Automatically close this implicitly open zone.
> +             */
> +            nvme_aor_dec_open(n, ns);
> +            nvme_assign_zone_state(n, ns, zone, NVME_ZONE_STATE_CLOSED);
> +        }
> +    }
> +}
> +
>  static uint16_t nvme_check_zone_write(NvmeZone *zone, uint64_t slba,
>      uint32_t nlb)
>  {
> @@ -531,6 +630,23 @@ static uint16_t nvme_check_zone_read(NvmeCtrl *n, NvmeZone *zone, uint64_t slba,
>      return status;
>  }
>
> +static uint16_t nvme_auto_open_zone(NvmeCtrl *n, NvmeNamespace *ns,
> +    NvmeZone *zone)
> +{
> +    uint16_t status = NVME_SUCCESS;
> +    uint8_t zs = nvme_get_zone_state(zone);
> +
> +    if (zs == NVME_ZONE_STATE_EMPTY) {
> +        nvme_auto_transition_zone(n, ns, true, true);
> +        status = nvme_aor_check(n, ns, 1, 1);
> +    } else if (zs == NVME_ZONE_STATE_CLOSED) {
> +        nvme_auto_transition_zone(n, ns, true, false);
> +        status = nvme_aor_check(n, ns, 0, 1);
> +    }
> +
> +    return status;
> +}
> +
>  static uint64_t nvme_finalize_zone_write(NvmeCtrl *n, NvmeNamespace *ns,
>      NvmeZone *zone, uint32_t nlb)
>  {
> @@ -543,7 +659,11 @@ static uint64_t nvme_finalize_zone_write(NvmeCtrl *n, NvmeNamespace *ns,
>          switch (zs) {
>          case NVME_ZONE_STATE_IMPLICITLY_OPEN:
>          case NVME_ZONE_STATE_EXPLICITLY_OPEN:
> +            nvme_aor_dec_open(n, ns);
> +            /* fall through */
>          case NVME_ZONE_STATE_CLOSED:
> +            nvme_aor_dec_active(n, ns);
> +            /* fall through */
>          case NVME_ZONE_STATE_EMPTY:
>              break;
>          default:
> @@ -553,7 +673,10 @@ static uint64_t nvme_finalize_zone_write(NvmeCtrl *n, NvmeNamespace *ns,
>      } else {
>          switch (zs) {
>          case NVME_ZONE_STATE_EMPTY:
> +            nvme_aor_inc_active(n, ns);
> +            /* fall through */
>          case NVME_ZONE_STATE_CLOSED:
> +            nvme_aor_inc_open(n, ns);
>              nvme_assign_zone_state(n, ns, zone,
>                                     NVME_ZONE_STATE_IMPLICITLY_OPEN);
>          }
> @@ -636,6 +759,11 @@ static uint16_t nvme_write_zeros(NvmeCtrl *n, NvmeNamespace *ns, NvmeCmd *cmd,
>                                                 zone->d.wp);
>              return NVME_ZONE_INVALID_WRITE | NVME_DNR;
>          }
> +
> +        status = nvme_auto_open_zone(n, ns, zone);
> +        if (status != NVME_SUCCESS) {
> +            return status;
> +        }
>      }
>
>      block_acct_start(blk_get_stats(n->conf.blk), &req->acct, 0,
> @@ -709,6 +837,11 @@ static uint16_t nvme_rw(NvmeCtrl *n, NvmeNamespace *ns, NvmeCmd *cmd,
>                                                     zone->d.wp);
>                  return NVME_ZONE_INVALID_WRITE | NVME_DNR;
>              }
> +
> +            status = nvme_auto_open_zone(n, ns, zone);
> +            if (status != NVME_SUCCESS) {
> +                return status;
> +            }
>          } else {
>              status = nvme_check_zone_read(n, zone, slba, nlb,
>                                            n->params.cross_zone_read);
> @@ -804,9 +937,27 @@ static uint16_t nvme_get_mgmt_zone_slba_idx(NvmeCtrl *n, NvmeNamespace *ns,
>  static uint16_t nvme_open_zone(NvmeCtrl *n, NvmeNamespace *ns,
>      NvmeZone *zone, uint8_t state)
>  {
> +    uint16_t status;
> +
>      switch (state) {
>      case NVME_ZONE_STATE_EMPTY:
> +        nvme_auto_transition_zone(n, ns, false, true);
> +        status = nvme_aor_check(n, ns, 1, 0);
> +        if (status != NVME_SUCCESS) {
> +            return status;
> +        }
> +        nvme_aor_inc_active(n, ns);
> +        /* fall through */
>      case NVME_ZONE_STATE_CLOSED:
> +        status = nvme_aor_check(n, ns, 0, 1);
> +        if (status != NVME_SUCCESS) {
> +            if (state == NVME_ZONE_STATE_EMPTY) {
> +                nvme_aor_dec_active(n, ns);
> +            }
> +            return status;
> +        }
> +        nvme_aor_inc_open(n, ns);
> +        /* fall through */
>      case NVME_ZONE_STATE_IMPLICITLY_OPEN:
>          nvme_assign_zone_state(n, ns, zone, NVME_ZONE_STATE_EXPLICITLY_OPEN);
>          /* fall through */
> @@ -828,6 +979,7 @@ static uint16_t nvme_close_zone(NvmeCtrl *n,  NvmeNamespace *ns,
>      switch (state) {
>      case NVME_ZONE_STATE_EXPLICITLY_OPEN:
>      case NVME_ZONE_STATE_IMPLICITLY_OPEN:
> +        nvme_aor_dec_open(n, ns);
>          nvme_assign_zone_state(n, ns, zone, NVME_ZONE_STATE_CLOSED);
>          /* fall through */
>      case NVME_ZONE_STATE_CLOSED:
> @@ -849,7 +1001,11 @@ static uint16_t nvme_finish_zone(NvmeCtrl *n, NvmeNamespace *ns,
>      switch (state) {
>      case NVME_ZONE_STATE_EXPLICITLY_OPEN:
>      case NVME_ZONE_STATE_IMPLICITLY_OPEN:
> +        nvme_aor_dec_open(n, ns);
> +        /* fall through */
>      case NVME_ZONE_STATE_CLOSED:
> +        nvme_aor_dec_active(n, ns);
> +        /* fall through */
>      case NVME_ZONE_STATE_EMPTY:
>          zone->d.wp = nvme_zone_wr_boundary(zone);
>          nvme_assign_zone_state(n, ns, zone, NVME_ZONE_STATE_FULL);
> @@ -874,7 +1030,11 @@ static uint16_t nvme_reset_zone(NvmeCtrl *n, NvmeNamespace *ns,
>      switch (state) {
>      case NVME_ZONE_STATE_EXPLICITLY_OPEN:
>      case NVME_ZONE_STATE_IMPLICITLY_OPEN:
> +        nvme_aor_dec_open(n, ns);
> +        /* fall through */
>      case NVME_ZONE_STATE_CLOSED:
> +        nvme_aor_dec_active(n, ns);
> +        /* fall through */
>      case NVME_ZONE_STATE_FULL:
>          zone->d.wp = zone->d.zslba;
>          nvme_assign_zone_state(n, ns, zone, NVME_ZONE_STATE_EMPTY);
> @@ -2412,6 +2572,15 @@ static void nvme_zoned_init_ctrl(NvmeCtrl *n, Error **errp)
>      uint64_t zone_size = 0, capacity;
>      uint32_t nz;
>
> +    if (n->params.max_open_zones < 0) {
> +        error_setg(errp, "invalid max_open_zones value");
> +        return;
> +    }
> +    if (n->params.max_active_zones < 0) {
> +        error_setg(errp, "invalid max_active_zones value");
> +        return;
> +    }
> +
>      if (n->params.zone_size) {
>          zone_size = n->params.zone_size;
>      } else {
> @@ -2435,6 +2604,14 @@ static void nvme_zoned_init_ctrl(NvmeCtrl *n, Error **errp)
>      n->num_zones = nz;
>      n->zone_array_size = sizeof(NvmeZone) * nz;
>
> +    /* Make sure that the values of all Zoned Command Set properties are sane */
> +    if (n->params.max_open_zones > nz) {
> +        n->params.max_open_zones = nz;
> +    }
> +    if (n->params.max_active_zones > nz) {
> +        n->params.max_active_zones = nz;
> +    }

Should there be some warning here? You are overwriting the property
that was set by the board, it seems like you should tell someone.

Alistair

> +
>      return;
>  }
>
> @@ -2452,8 +2629,8 @@ static int nvme_zoned_init_ns(NvmeCtrl *n, NvmeNamespace *ns, int lba_index,
>      ns->id_ns_zoned = g_malloc0(sizeof(*ns->id_ns_zoned));
>
>      /* MAR/MOR are zeroes-based, 0xffffffff means no limit */
> -    ns->id_ns_zoned->mar = 0xffffffff;
> -    ns->id_ns_zoned->mor = 0xffffffff;
> +    ns->id_ns_zoned->mar = cpu_to_le32(n->params.max_active_zones - 1);
> +    ns->id_ns_zoned->mor = cpu_to_le32(n->params.max_open_zones - 1);
>      ns->id_ns_zoned->zoc = 0;
>      ns->id_ns_zoned->ozcs = n->params.cross_zone_read ? 0x01 : 0x00;
>
> @@ -2813,6 +2990,8 @@ static Property nvme_props[] = {
>      DEFINE_PROP_UINT64("zone_size", NvmeCtrl, params.zone_size, 512),
>      DEFINE_PROP_UINT64("zone_capacity", NvmeCtrl, params.zone_capacity, 512),
>      DEFINE_PROP_UINT32("zone_append_max_size", NvmeCtrl, params.zamds_bs, 0),
> +    DEFINE_PROP_INT32("max_active", NvmeCtrl, params.max_active_zones, 0),
> +    DEFINE_PROP_INT32("max_open", NvmeCtrl, params.max_open_zones, 0),
>      DEFINE_PROP_BOOL("cross_zone_read", NvmeCtrl, params.cross_zone_read, true),
>      DEFINE_PROP_UINT8("fill_pattern", NvmeCtrl, params.fill_pattern, 0),
>      DEFINE_PROP_END_OF_LIST(),
> diff --git a/hw/block/nvme.h b/hw/block/nvme.h
> index 2c932b5e29..f5a4679702 100644
> --- a/hw/block/nvme.h
> +++ b/hw/block/nvme.h
> @@ -19,6 +19,8 @@ typedef struct NvmeParams {
>      uint32_t    zamds_bs;
>      uint64_t    zone_size;
>      uint64_t    zone_capacity;
> +    int32_t     max_active_zones;
> +    int32_t     max_open_zones;
>  } NvmeParams;
>
>  typedef struct NvmeAsyncEvent {
> @@ -103,6 +105,8 @@ typedef struct NvmeNamespace {
>      NvmeZoneList    *imp_open_zones;
>      NvmeZoneList    *closed_zones;
>      NvmeZoneList    *full_zones;
> +    int32_t         nr_open_zones;
> +    int32_t         nr_active_zones;
>  } NvmeNamespace;
>
>  static inline NvmeLBAF *nvme_ns_lbaf(NvmeNamespace *ns)
> --
> 2.21.0
>
>
Klaus Jensen July 1, 2020, 6:41 a.m. UTC | #2
On Jun 18 06:34, Dmitry Fomichev wrote:
> Added two module properties, "max_active" and "max_open" to control
> the maximum number of zones that can be active or open. Once these
> variables are set to non-default values, the driver checks these
> limits during I/O and returns Too Many Active or Too Many Open
> command status if they are exceeded.
> 
> Signed-off-by: Hans Holmberg <hans.holmberg@wdc.com>
> Signed-off-by: Dmitry Fomichev <dmitry.fomichev@wdc.com>
> ---
>  hw/block/nvme.c | 183 +++++++++++++++++++++++++++++++++++++++++++++++-
>  hw/block/nvme.h |   4 ++
>  2 files changed, 185 insertions(+), 2 deletions(-)
> 
> diff --git a/hw/block/nvme.c b/hw/block/nvme.c
> index 2e03b0b6ed..05a7cbcfcc 100644
> --- a/hw/block/nvme.c
> +++ b/hw/block/nvme.c
> @@ -120,6 +120,87 @@ static void nvme_remove_zone(NvmeCtrl *n, NvmeNamespace *ns, NvmeZoneList *zl,
>      zone->prev = zone->next = 0;
>  }
>  
> +/*
> + * Take the first zone out from a list, return NULL if the list is empty.
> + */
> +static NvmeZone *nvme_remove_zone_head(NvmeCtrl *n, NvmeNamespace *ns,
> +    NvmeZoneList *zl)
> +{
> +    NvmeZone *zone = nvme_peek_zone_head(ns, zl);
> +
> +    if (zone) {
> +        --zl->size;
> +        if (zl->size == 0) {
> +            zl->head = NVME_ZONE_LIST_NIL;
> +            zl->tail = NVME_ZONE_LIST_NIL;
> +        } else {
> +            zl->head = zone->next;
> +            ns->zone_array[zl->head].prev = NVME_ZONE_LIST_NIL;
> +        }
> +        zone->prev = zone->next = 0;
> +    }
> +
> +    return zone;
> +}
> +
> +/*
> + * Check if we can open a zone without exceeding open/active limits.
> + * AOR stands for "Active and Open Resources" (see TP 4053 section 2.5).
> + */
> +static int nvme_aor_check(NvmeCtrl *n, NvmeNamespace *ns,
> +     uint32_t act, uint32_t opn)
> +{
> +    if (n->params.max_active_zones != 0 &&
> +        ns->nr_active_zones + act > n->params.max_active_zones) {
> +        trace_pci_nvme_err_insuff_active_res(n->params.max_active_zones);
> +        return NVME_ZONE_TOO_MANY_ACTIVE | NVME_DNR;
> +    }
> +    if (n->params.max_open_zones != 0 &&
> +        ns->nr_open_zones + opn > n->params.max_open_zones) {
> +        trace_pci_nvme_err_insuff_open_res(n->params.max_open_zones);
> +        return NVME_ZONE_TOO_MANY_OPEN | NVME_DNR;
> +    }
> +
> +    return NVME_SUCCESS;
> +}
> +
> +static inline void nvme_aor_inc_open(NvmeCtrl *n, NvmeNamespace *ns)
> +{
> +    assert(ns->nr_open_zones >= 0);
> +    if (n->params.max_open_zones) {
> +        ns->nr_open_zones++;
> +        assert(ns->nr_open_zones <= n->params.max_open_zones);
> +    }
> +}
> +
> +static inline void nvme_aor_dec_open(NvmeCtrl *n, NvmeNamespace *ns)
> +{
> +    if (n->params.max_open_zones) {
> +        assert(ns->nr_open_zones > 0);
> +        ns->nr_open_zones--;
> +    }
> +    assert(ns->nr_open_zones >= 0);
> +}
> +
> +static inline void nvme_aor_inc_active(NvmeCtrl *n, NvmeNamespace *ns)
> +{
> +    assert(ns->nr_active_zones >= 0);
> +    if (n->params.max_active_zones) {
> +        ns->nr_active_zones++;
> +        assert(ns->nr_active_zones <= n->params.max_active_zones);
> +    }
> +}
> +
> +static inline void nvme_aor_dec_active(NvmeCtrl *n, NvmeNamespace *ns)
> +{
> +    if (n->params.max_active_zones) {
> +        assert(ns->nr_active_zones > 0);
> +        ns->nr_active_zones--;
> +        assert(ns->nr_active_zones >= ns->nr_open_zones);
> +    }
> +    assert(ns->nr_active_zones >= 0);
> +}
> +
>  static void nvme_assign_zone_state(NvmeCtrl *n, NvmeNamespace *ns,
>      NvmeZone *zone, uint8_t state)
>  {
> @@ -454,6 +535,24 @@ static void nvme_enqueue_req_completion(NvmeCQueue *cq, NvmeRequest *req)
>      timer_mod(cq->timer, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + 500);
>  }
>  
> +static void nvme_auto_transition_zone(NvmeCtrl *n, NvmeNamespace *ns,
> +    bool implicit, bool adding_active)
> +{
> +    NvmeZone *zone;
> +
> +    if (implicit && n->params.max_open_zones &&
> +        ns->nr_open_zones == n->params.max_open_zones) {
> +        zone = nvme_remove_zone_head(n, ns, ns->imp_open_zones);
> +        if (zone) {
> +            /*
> +             * Automatically close this implicitly open zone.
> +             */
> +            nvme_aor_dec_open(n, ns);
> +            nvme_assign_zone_state(n, ns, zone, NVME_ZONE_STATE_CLOSED);
> +        }
> +    }
> +}
> +
>  static uint16_t nvme_check_zone_write(NvmeZone *zone, uint64_t slba,
>      uint32_t nlb)
>  {
> @@ -531,6 +630,23 @@ static uint16_t nvme_check_zone_read(NvmeCtrl *n, NvmeZone *zone, uint64_t slba,
>      return status;
>  }
>  
> +static uint16_t nvme_auto_open_zone(NvmeCtrl *n, NvmeNamespace *ns,
> +    NvmeZone *zone)
> +{
> +    uint16_t status = NVME_SUCCESS;
> +    uint8_t zs = nvme_get_zone_state(zone);
> +
> +    if (zs == NVME_ZONE_STATE_EMPTY) {
> +        nvme_auto_transition_zone(n, ns, true, true);
> +        status = nvme_aor_check(n, ns, 1, 1);
> +    } else if (zs == NVME_ZONE_STATE_CLOSED) {
> +        nvme_auto_transition_zone(n, ns, true, false);
> +        status = nvme_aor_check(n, ns, 0, 1);
> +    }
> +
> +    return status;
> +}
> +
>  static uint64_t nvme_finalize_zone_write(NvmeCtrl *n, NvmeNamespace *ns,
>      NvmeZone *zone, uint32_t nlb)
>  {
> @@ -543,7 +659,11 @@ static uint64_t nvme_finalize_zone_write(NvmeCtrl *n, NvmeNamespace *ns,
>          switch (zs) {
>          case NVME_ZONE_STATE_IMPLICITLY_OPEN:
>          case NVME_ZONE_STATE_EXPLICITLY_OPEN:
> +            nvme_aor_dec_open(n, ns);
> +            /* fall through */
>          case NVME_ZONE_STATE_CLOSED:
> +            nvme_aor_dec_active(n, ns);
> +            /* fall through */
>          case NVME_ZONE_STATE_EMPTY:
>              break;
>          default:
> @@ -553,7 +673,10 @@ static uint64_t nvme_finalize_zone_write(NvmeCtrl *n, NvmeNamespace *ns,
>      } else {
>          switch (zs) {
>          case NVME_ZONE_STATE_EMPTY:
> +            nvme_aor_inc_active(n, ns);
> +            /* fall through */
>          case NVME_ZONE_STATE_CLOSED:
> +            nvme_aor_inc_open(n, ns);
>              nvme_assign_zone_state(n, ns, zone,
>                                     NVME_ZONE_STATE_IMPLICITLY_OPEN);
>          }
> @@ -636,6 +759,11 @@ static uint16_t nvme_write_zeros(NvmeCtrl *n, NvmeNamespace *ns, NvmeCmd *cmd,
>                                                 zone->d.wp);
>              return NVME_ZONE_INVALID_WRITE | NVME_DNR;
>          }
> +
> +        status = nvme_auto_open_zone(n, ns, zone);
> +        if (status != NVME_SUCCESS) {
> +            return status;
> +        }
>      }
>  
>      block_acct_start(blk_get_stats(n->conf.blk), &req->acct, 0,
> @@ -709,6 +837,11 @@ static uint16_t nvme_rw(NvmeCtrl *n, NvmeNamespace *ns, NvmeCmd *cmd,
>                                                     zone->d.wp);
>                  return NVME_ZONE_INVALID_WRITE | NVME_DNR;
>              }
> +
> +            status = nvme_auto_open_zone(n, ns, zone);
> +            if (status != NVME_SUCCESS) {
> +                return status;
> +            }
>          } else {
>              status = nvme_check_zone_read(n, zone, slba, nlb,
>                                            n->params.cross_zone_read);
> @@ -804,9 +937,27 @@ static uint16_t nvme_get_mgmt_zone_slba_idx(NvmeCtrl *n, NvmeNamespace *ns,
>  static uint16_t nvme_open_zone(NvmeCtrl *n, NvmeNamespace *ns,
>      NvmeZone *zone, uint8_t state)
>  {
> +    uint16_t status;
> +
>      switch (state) {
>      case NVME_ZONE_STATE_EMPTY:
> +        nvme_auto_transition_zone(n, ns, false, true);
> +        status = nvme_aor_check(n, ns, 1, 0);
> +        if (status != NVME_SUCCESS) {
> +            return status;
> +        }
> +        nvme_aor_inc_active(n, ns);
> +        /* fall through */
>      case NVME_ZONE_STATE_CLOSED:
> +        status = nvme_aor_check(n, ns, 0, 1);
> +        if (status != NVME_SUCCESS) {
> +            if (state == NVME_ZONE_STATE_EMPTY) {
> +                nvme_aor_dec_active(n, ns);
> +            }
> +            return status;
> +        }
> +        nvme_aor_inc_open(n, ns);
> +        /* fall through */
>      case NVME_ZONE_STATE_IMPLICITLY_OPEN:
>          nvme_assign_zone_state(n, ns, zone, NVME_ZONE_STATE_EXPLICITLY_OPEN);
>          /* fall through */
> @@ -828,6 +979,7 @@ static uint16_t nvme_close_zone(NvmeCtrl *n,  NvmeNamespace *ns,
>      switch (state) {
>      case NVME_ZONE_STATE_EXPLICITLY_OPEN:
>      case NVME_ZONE_STATE_IMPLICITLY_OPEN:
> +        nvme_aor_dec_open(n, ns);
>          nvme_assign_zone_state(n, ns, zone, NVME_ZONE_STATE_CLOSED);
>          /* fall through */
>      case NVME_ZONE_STATE_CLOSED:
> @@ -849,7 +1001,11 @@ static uint16_t nvme_finish_zone(NvmeCtrl *n, NvmeNamespace *ns,
>      switch (state) {
>      case NVME_ZONE_STATE_EXPLICITLY_OPEN:
>      case NVME_ZONE_STATE_IMPLICITLY_OPEN:
> +        nvme_aor_dec_open(n, ns);
> +        /* fall through */
>      case NVME_ZONE_STATE_CLOSED:
> +        nvme_aor_dec_active(n, ns);
> +        /* fall through */
>      case NVME_ZONE_STATE_EMPTY:
>          zone->d.wp = nvme_zone_wr_boundary(zone);
>          nvme_assign_zone_state(n, ns, zone, NVME_ZONE_STATE_FULL);
> @@ -874,7 +1030,11 @@ static uint16_t nvme_reset_zone(NvmeCtrl *n, NvmeNamespace *ns,
>      switch (state) {
>      case NVME_ZONE_STATE_EXPLICITLY_OPEN:
>      case NVME_ZONE_STATE_IMPLICITLY_OPEN:
> +        nvme_aor_dec_open(n, ns);
> +        /* fall through */
>      case NVME_ZONE_STATE_CLOSED:
> +        nvme_aor_dec_active(n, ns);
> +        /* fall through */
>      case NVME_ZONE_STATE_FULL:
>          zone->d.wp = zone->d.zslba;
>          nvme_assign_zone_state(n, ns, zone, NVME_ZONE_STATE_EMPTY);
> @@ -2412,6 +2572,15 @@ static void nvme_zoned_init_ctrl(NvmeCtrl *n, Error **errp)
>      uint64_t zone_size = 0, capacity;
>      uint32_t nz;
>  
> +    if (n->params.max_open_zones < 0) {
> +        error_setg(errp, "invalid max_open_zones value");
> +        return;
> +    }
> +    if (n->params.max_active_zones < 0) {
> +        error_setg(errp, "invalid max_active_zones value");
> +        return;
> +    }
> +
>      if (n->params.zone_size) {
>          zone_size = n->params.zone_size;
>      } else {
> @@ -2435,6 +2604,14 @@ static void nvme_zoned_init_ctrl(NvmeCtrl *n, Error **errp)
>      n->num_zones = nz;
>      n->zone_array_size = sizeof(NvmeZone) * nz;
>  
> +    /* Make sure that the values of all Zoned Command Set properties are sane */
> +    if (n->params.max_open_zones > nz) {
> +        n->params.max_open_zones = nz;
> +    }
> +    if (n->params.max_active_zones > nz) {
> +        n->params.max_active_zones = nz;
> +    }

As Alistair already pointed out, a warning would be nice.

> +
>      return;
>  }
>  
> @@ -2452,8 +2629,8 @@ static int nvme_zoned_init_ns(NvmeCtrl *n, NvmeNamespace *ns, int lba_index,
>      ns->id_ns_zoned = g_malloc0(sizeof(*ns->id_ns_zoned));
>  
>      /* MAR/MOR are zeroes-based, 0xffffffff means no limit */
> -    ns->id_ns_zoned->mar = 0xffffffff;
> -    ns->id_ns_zoned->mor = 0xffffffff;
> +    ns->id_ns_zoned->mar = cpu_to_le32(n->params.max_active_zones - 1);
> +    ns->id_ns_zoned->mor = cpu_to_le32(n->params.max_open_zones - 1);
>      ns->id_ns_zoned->zoc = 0;
>      ns->id_ns_zoned->ozcs = n->params.cross_zone_read ? 0x01 : 0x00;
>  
> @@ -2813,6 +2990,8 @@ static Property nvme_props[] = {
>      DEFINE_PROP_UINT64("zone_size", NvmeCtrl, params.zone_size, 512),
>      DEFINE_PROP_UINT64("zone_capacity", NvmeCtrl, params.zone_capacity, 512),
>      DEFINE_PROP_UINT32("zone_append_max_size", NvmeCtrl, params.zamds_bs, 0),
> +    DEFINE_PROP_INT32("max_active", NvmeCtrl, params.max_active_zones, 0),
> +    DEFINE_PROP_INT32("max_open", NvmeCtrl, params.max_open_zones, 0),

max_active and max_open should be unsigned. 0xfffffffe is a valid value
for MAR/MOR.

>      DEFINE_PROP_BOOL("cross_zone_read", NvmeCtrl, params.cross_zone_read, true),
>      DEFINE_PROP_UINT8("fill_pattern", NvmeCtrl, params.fill_pattern, 0),
>      DEFINE_PROP_END_OF_LIST(),
> diff --git a/hw/block/nvme.h b/hw/block/nvme.h
> index 2c932b5e29..f5a4679702 100644
> --- a/hw/block/nvme.h
> +++ b/hw/block/nvme.h
> @@ -19,6 +19,8 @@ typedef struct NvmeParams {
>      uint32_t    zamds_bs;
>      uint64_t    zone_size;
>      uint64_t    zone_capacity;
> +    int32_t     max_active_zones;
> +    int32_t     max_open_zones;
>  } NvmeParams;
>  
>  typedef struct NvmeAsyncEvent {
> @@ -103,6 +105,8 @@ typedef struct NvmeNamespace {
>      NvmeZoneList    *imp_open_zones;
>      NvmeZoneList    *closed_zones;
>      NvmeZoneList    *full_zones;
> +    int32_t         nr_open_zones;
> +    int32_t         nr_active_zones;
>  } NvmeNamespace;
>  
>  static inline NvmeLBAF *nvme_ns_lbaf(NvmeNamespace *ns)
> -- 
> 2.21.0
> 
>
diff mbox series

Patch

diff --git a/hw/block/nvme.c b/hw/block/nvme.c
index 2e03b0b6ed..05a7cbcfcc 100644
--- a/hw/block/nvme.c
+++ b/hw/block/nvme.c
@@ -120,6 +120,87 @@  static void nvme_remove_zone(NvmeCtrl *n, NvmeNamespace *ns, NvmeZoneList *zl,
     zone->prev = zone->next = 0;
 }
 
+/*
+ * Take the first zone out from a list, return NULL if the list is empty.
+ */
+static NvmeZone *nvme_remove_zone_head(NvmeCtrl *n, NvmeNamespace *ns,
+    NvmeZoneList *zl)
+{
+    NvmeZone *zone = nvme_peek_zone_head(ns, zl);
+
+    if (zone) {
+        --zl->size;
+        if (zl->size == 0) {
+            zl->head = NVME_ZONE_LIST_NIL;
+            zl->tail = NVME_ZONE_LIST_NIL;
+        } else {
+            zl->head = zone->next;
+            ns->zone_array[zl->head].prev = NVME_ZONE_LIST_NIL;
+        }
+        zone->prev = zone->next = 0;
+    }
+
+    return zone;
+}
+
+/*
+ * Check if we can open a zone without exceeding open/active limits.
+ * AOR stands for "Active and Open Resources" (see TP 4053 section 2.5).
+ */
+static int nvme_aor_check(NvmeCtrl *n, NvmeNamespace *ns,
+     uint32_t act, uint32_t opn)
+{
+    if (n->params.max_active_zones != 0 &&
+        ns->nr_active_zones + act > n->params.max_active_zones) {
+        trace_pci_nvme_err_insuff_active_res(n->params.max_active_zones);
+        return NVME_ZONE_TOO_MANY_ACTIVE | NVME_DNR;
+    }
+    if (n->params.max_open_zones != 0 &&
+        ns->nr_open_zones + opn > n->params.max_open_zones) {
+        trace_pci_nvme_err_insuff_open_res(n->params.max_open_zones);
+        return NVME_ZONE_TOO_MANY_OPEN | NVME_DNR;
+    }
+
+    return NVME_SUCCESS;
+}
+
+static inline void nvme_aor_inc_open(NvmeCtrl *n, NvmeNamespace *ns)
+{
+    assert(ns->nr_open_zones >= 0);
+    if (n->params.max_open_zones) {
+        ns->nr_open_zones++;
+        assert(ns->nr_open_zones <= n->params.max_open_zones);
+    }
+}
+
+static inline void nvme_aor_dec_open(NvmeCtrl *n, NvmeNamespace *ns)
+{
+    if (n->params.max_open_zones) {
+        assert(ns->nr_open_zones > 0);
+        ns->nr_open_zones--;
+    }
+    assert(ns->nr_open_zones >= 0);
+}
+
+static inline void nvme_aor_inc_active(NvmeCtrl *n, NvmeNamespace *ns)
+{
+    assert(ns->nr_active_zones >= 0);
+    if (n->params.max_active_zones) {
+        ns->nr_active_zones++;
+        assert(ns->nr_active_zones <= n->params.max_active_zones);
+    }
+}
+
+static inline void nvme_aor_dec_active(NvmeCtrl *n, NvmeNamespace *ns)
+{
+    if (n->params.max_active_zones) {
+        assert(ns->nr_active_zones > 0);
+        ns->nr_active_zones--;
+        assert(ns->nr_active_zones >= ns->nr_open_zones);
+    }
+    assert(ns->nr_active_zones >= 0);
+}
+
 static void nvme_assign_zone_state(NvmeCtrl *n, NvmeNamespace *ns,
     NvmeZone *zone, uint8_t state)
 {
@@ -454,6 +535,24 @@  static void nvme_enqueue_req_completion(NvmeCQueue *cq, NvmeRequest *req)
     timer_mod(cq->timer, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + 500);
 }
 
+static void nvme_auto_transition_zone(NvmeCtrl *n, NvmeNamespace *ns,
+    bool implicit, bool adding_active)
+{
+    NvmeZone *zone;
+
+    if (implicit && n->params.max_open_zones &&
+        ns->nr_open_zones == n->params.max_open_zones) {
+        zone = nvme_remove_zone_head(n, ns, ns->imp_open_zones);
+        if (zone) {
+            /*
+             * Automatically close this implicitly open zone.
+             */
+            nvme_aor_dec_open(n, ns);
+            nvme_assign_zone_state(n, ns, zone, NVME_ZONE_STATE_CLOSED);
+        }
+    }
+}
+
 static uint16_t nvme_check_zone_write(NvmeZone *zone, uint64_t slba,
     uint32_t nlb)
 {
@@ -531,6 +630,23 @@  static uint16_t nvme_check_zone_read(NvmeCtrl *n, NvmeZone *zone, uint64_t slba,
     return status;
 }
 
+static uint16_t nvme_auto_open_zone(NvmeCtrl *n, NvmeNamespace *ns,
+    NvmeZone *zone)
+{
+    uint16_t status = NVME_SUCCESS;
+    uint8_t zs = nvme_get_zone_state(zone);
+
+    if (zs == NVME_ZONE_STATE_EMPTY) {
+        nvme_auto_transition_zone(n, ns, true, true);
+        status = nvme_aor_check(n, ns, 1, 1);
+    } else if (zs == NVME_ZONE_STATE_CLOSED) {
+        nvme_auto_transition_zone(n, ns, true, false);
+        status = nvme_aor_check(n, ns, 0, 1);
+    }
+
+    return status;
+}
+
 static uint64_t nvme_finalize_zone_write(NvmeCtrl *n, NvmeNamespace *ns,
     NvmeZone *zone, uint32_t nlb)
 {
@@ -543,7 +659,11 @@  static uint64_t nvme_finalize_zone_write(NvmeCtrl *n, NvmeNamespace *ns,
         switch (zs) {
         case NVME_ZONE_STATE_IMPLICITLY_OPEN:
         case NVME_ZONE_STATE_EXPLICITLY_OPEN:
+            nvme_aor_dec_open(n, ns);
+            /* fall through */
         case NVME_ZONE_STATE_CLOSED:
+            nvme_aor_dec_active(n, ns);
+            /* fall through */
         case NVME_ZONE_STATE_EMPTY:
             break;
         default:
@@ -553,7 +673,10 @@  static uint64_t nvme_finalize_zone_write(NvmeCtrl *n, NvmeNamespace *ns,
     } else {
         switch (zs) {
         case NVME_ZONE_STATE_EMPTY:
+            nvme_aor_inc_active(n, ns);
+            /* fall through */
         case NVME_ZONE_STATE_CLOSED:
+            nvme_aor_inc_open(n, ns);
             nvme_assign_zone_state(n, ns, zone,
                                    NVME_ZONE_STATE_IMPLICITLY_OPEN);
         }
@@ -636,6 +759,11 @@  static uint16_t nvme_write_zeros(NvmeCtrl *n, NvmeNamespace *ns, NvmeCmd *cmd,
                                                zone->d.wp);
             return NVME_ZONE_INVALID_WRITE | NVME_DNR;
         }
+
+        status = nvme_auto_open_zone(n, ns, zone);
+        if (status != NVME_SUCCESS) {
+            return status;
+        }
     }
 
     block_acct_start(blk_get_stats(n->conf.blk), &req->acct, 0,
@@ -709,6 +837,11 @@  static uint16_t nvme_rw(NvmeCtrl *n, NvmeNamespace *ns, NvmeCmd *cmd,
                                                    zone->d.wp);
                 return NVME_ZONE_INVALID_WRITE | NVME_DNR;
             }
+
+            status = nvme_auto_open_zone(n, ns, zone);
+            if (status != NVME_SUCCESS) {
+                return status;
+            }
         } else {
             status = nvme_check_zone_read(n, zone, slba, nlb,
                                           n->params.cross_zone_read);
@@ -804,9 +937,27 @@  static uint16_t nvme_get_mgmt_zone_slba_idx(NvmeCtrl *n, NvmeNamespace *ns,
 static uint16_t nvme_open_zone(NvmeCtrl *n, NvmeNamespace *ns,
     NvmeZone *zone, uint8_t state)
 {
+    uint16_t status;
+
     switch (state) {
     case NVME_ZONE_STATE_EMPTY:
+        nvme_auto_transition_zone(n, ns, false, true);
+        status = nvme_aor_check(n, ns, 1, 0);
+        if (status != NVME_SUCCESS) {
+            return status;
+        }
+        nvme_aor_inc_active(n, ns);
+        /* fall through */
     case NVME_ZONE_STATE_CLOSED:
+        status = nvme_aor_check(n, ns, 0, 1);
+        if (status != NVME_SUCCESS) {
+            if (state == NVME_ZONE_STATE_EMPTY) {
+                nvme_aor_dec_active(n, ns);
+            }
+            return status;
+        }
+        nvme_aor_inc_open(n, ns);
+        /* fall through */
     case NVME_ZONE_STATE_IMPLICITLY_OPEN:
         nvme_assign_zone_state(n, ns, zone, NVME_ZONE_STATE_EXPLICITLY_OPEN);
         /* fall through */
@@ -828,6 +979,7 @@  static uint16_t nvme_close_zone(NvmeCtrl *n,  NvmeNamespace *ns,
     switch (state) {
     case NVME_ZONE_STATE_EXPLICITLY_OPEN:
     case NVME_ZONE_STATE_IMPLICITLY_OPEN:
+        nvme_aor_dec_open(n, ns);
         nvme_assign_zone_state(n, ns, zone, NVME_ZONE_STATE_CLOSED);
         /* fall through */
     case NVME_ZONE_STATE_CLOSED:
@@ -849,7 +1001,11 @@  static uint16_t nvme_finish_zone(NvmeCtrl *n, NvmeNamespace *ns,
     switch (state) {
     case NVME_ZONE_STATE_EXPLICITLY_OPEN:
     case NVME_ZONE_STATE_IMPLICITLY_OPEN:
+        nvme_aor_dec_open(n, ns);
+        /* fall through */
     case NVME_ZONE_STATE_CLOSED:
+        nvme_aor_dec_active(n, ns);
+        /* fall through */
     case NVME_ZONE_STATE_EMPTY:
         zone->d.wp = nvme_zone_wr_boundary(zone);
         nvme_assign_zone_state(n, ns, zone, NVME_ZONE_STATE_FULL);
@@ -874,7 +1030,11 @@  static uint16_t nvme_reset_zone(NvmeCtrl *n, NvmeNamespace *ns,
     switch (state) {
     case NVME_ZONE_STATE_EXPLICITLY_OPEN:
     case NVME_ZONE_STATE_IMPLICITLY_OPEN:
+        nvme_aor_dec_open(n, ns);
+        /* fall through */
     case NVME_ZONE_STATE_CLOSED:
+        nvme_aor_dec_active(n, ns);
+        /* fall through */
     case NVME_ZONE_STATE_FULL:
         zone->d.wp = zone->d.zslba;
         nvme_assign_zone_state(n, ns, zone, NVME_ZONE_STATE_EMPTY);
@@ -2412,6 +2572,15 @@  static void nvme_zoned_init_ctrl(NvmeCtrl *n, Error **errp)
     uint64_t zone_size = 0, capacity;
     uint32_t nz;
 
+    if (n->params.max_open_zones < 0) {
+        error_setg(errp, "invalid max_open_zones value");
+        return;
+    }
+    if (n->params.max_active_zones < 0) {
+        error_setg(errp, "invalid max_active_zones value");
+        return;
+    }
+
     if (n->params.zone_size) {
         zone_size = n->params.zone_size;
     } else {
@@ -2435,6 +2604,14 @@  static void nvme_zoned_init_ctrl(NvmeCtrl *n, Error **errp)
     n->num_zones = nz;
     n->zone_array_size = sizeof(NvmeZone) * nz;
 
+    /* Make sure that the values of all Zoned Command Set properties are sane */
+    if (n->params.max_open_zones > nz) {
+        n->params.max_open_zones = nz;
+    }
+    if (n->params.max_active_zones > nz) {
+        n->params.max_active_zones = nz;
+    }
+
     return;
 }
 
@@ -2452,8 +2629,8 @@  static int nvme_zoned_init_ns(NvmeCtrl *n, NvmeNamespace *ns, int lba_index,
     ns->id_ns_zoned = g_malloc0(sizeof(*ns->id_ns_zoned));
 
     /* MAR/MOR are zeroes-based, 0xffffffff means no limit */
-    ns->id_ns_zoned->mar = 0xffffffff;
-    ns->id_ns_zoned->mor = 0xffffffff;
+    ns->id_ns_zoned->mar = cpu_to_le32(n->params.max_active_zones - 1);
+    ns->id_ns_zoned->mor = cpu_to_le32(n->params.max_open_zones - 1);
     ns->id_ns_zoned->zoc = 0;
     ns->id_ns_zoned->ozcs = n->params.cross_zone_read ? 0x01 : 0x00;
 
@@ -2813,6 +2990,8 @@  static Property nvme_props[] = {
     DEFINE_PROP_UINT64("zone_size", NvmeCtrl, params.zone_size, 512),
     DEFINE_PROP_UINT64("zone_capacity", NvmeCtrl, params.zone_capacity, 512),
     DEFINE_PROP_UINT32("zone_append_max_size", NvmeCtrl, params.zamds_bs, 0),
+    DEFINE_PROP_INT32("max_active", NvmeCtrl, params.max_active_zones, 0),
+    DEFINE_PROP_INT32("max_open", NvmeCtrl, params.max_open_zones, 0),
     DEFINE_PROP_BOOL("cross_zone_read", NvmeCtrl, params.cross_zone_read, true),
     DEFINE_PROP_UINT8("fill_pattern", NvmeCtrl, params.fill_pattern, 0),
     DEFINE_PROP_END_OF_LIST(),
diff --git a/hw/block/nvme.h b/hw/block/nvme.h
index 2c932b5e29..f5a4679702 100644
--- a/hw/block/nvme.h
+++ b/hw/block/nvme.h
@@ -19,6 +19,8 @@  typedef struct NvmeParams {
     uint32_t    zamds_bs;
     uint64_t    zone_size;
     uint64_t    zone_capacity;
+    int32_t     max_active_zones;
+    int32_t     max_open_zones;
 } NvmeParams;
 
 typedef struct NvmeAsyncEvent {
@@ -103,6 +105,8 @@  typedef struct NvmeNamespace {
     NvmeZoneList    *imp_open_zones;
     NvmeZoneList    *closed_zones;
     NvmeZoneList    *full_zones;
+    int32_t         nr_open_zones;
+    int32_t         nr_active_zones;
 } NvmeNamespace;
 
 static inline NvmeLBAF *nvme_ns_lbaf(NvmeNamespace *ns)