Message ID | 20200617213415.22417-14-dmitry.fomichev@wdc.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | hw/block/nvme: Support Namespace Types and Zoned Namespace Command Set | expand |
On Jun 18 06:34, Dmitry Fomichev wrote: > Added logic to set and reset FZR and RZR zone attributes. Four new > driver properties are added to control the timing of setting and > resetting these attributes. FZR/RZR delay lasts from the zone > operation and until when the corresponding zone attribute is set. > FZR/RZR limits set the time period between setting FZR or RZR > attribute and resetting it simulating the internal controller action > on that zone. > > Signed-off-by: Dmitry Fomichev <dmitry.fomichev@wdc.com> Please correct me if I am wrong here, but I want to raise a question about the use of QEMU_CLOCK_REALTIME here. I agree that it makes sense that the limits are "absolute", but does this hold for emulation? In my view, when emulation is stopped, the world is stopped. Should we emulate the need for background operations in this case? I don't think so. > --- > hw/block/nvme.c | 99 +++++++++++++++++++++++++++++++++++++++++++++++++ > hw/block/nvme.h | 13 ++++++- > 2 files changed, 111 insertions(+), 1 deletion(-) > > diff --git a/hw/block/nvme.c b/hw/block/nvme.c > index a29cbfcc96..c3898448c7 100644 > --- a/hw/block/nvme.c > +++ b/hw/block/nvme.c > @@ -201,6 +201,84 @@ static inline void nvme_aor_dec_active(NvmeCtrl *n, NvmeNamespace *ns) > assert(ns->nr_active_zones >= 0); > } > > +static void nvme_set_rzr(NvmeCtrl *n, NvmeNamespace *ns, NvmeZone *zone) > +{ > + assert(zone->flags & NVME_ZFLAGS_SET_RZR); > + zone->tstamp = qemu_clock_get_ns(QEMU_CLOCK_REALTIME); > + zone->flags &= ~NVME_ZFLAGS_TS_DELAY; > + zone->d.za |= NVME_ZA_RESET_RECOMMENDED; > + zone->flags &= ~NVME_ZFLAGS_SET_RZR; > + trace_pci_nvme_zone_reset_recommended(zone->d.zslba); > +} > + > +static void nvme_clear_rzr(NvmeCtrl *n, NvmeNamespace *ns, > + NvmeZone *zone, bool notify) > +{ > + if (n->params.rrl_usec) { > + zone->flags &= ~(NVME_ZFLAGS_SET_RZR | NVME_ZFLAGS_TS_DELAY); > + notify = notify && (zone->d.za & NVME_ZA_RESET_RECOMMENDED); > + zone->d.za &= ~NVME_ZA_RESET_RECOMMENDED; > + zone->tstamp = 0; > + } > +} > + > +static void nvme_set_fzr(NvmeCtrl *n, NvmeNamespace *ns, NvmeZone *zone) > +{ > + assert(zone->flags & NVME_ZFLAGS_SET_FZR); > + zone->tstamp = qemu_clock_get_ns(QEMU_CLOCK_REALTIME); > + zone->flags &= ~NVME_ZFLAGS_TS_DELAY; > + zone->d.za |= NVME_ZA_FINISH_RECOMMENDED; > + zone->flags &= ~NVME_ZFLAGS_SET_FZR; > + trace_pci_nvme_zone_finish_recommended(zone->d.zslba); > +} > + > +static void nvme_clear_fzr(NvmeCtrl *n, NvmeNamespace *ns, > + NvmeZone *zone, bool notify) > +{ > + if (n->params.frl_usec) { > + zone->flags &= ~(NVME_ZFLAGS_SET_FZR | NVME_ZFLAGS_TS_DELAY); > + notify = notify && (zone->d.za & NVME_ZA_FINISH_RECOMMENDED); > + zone->d.za &= ~NVME_ZA_FINISH_RECOMMENDED; > + zone->tstamp = 0; > + } > +} > + > +static void nvme_schedule_rzr(NvmeCtrl *n, NvmeNamespace *ns, NvmeZone *zone) > +{ > + if (n->params.frl_usec) { > + zone->flags &= ~(NVME_ZFLAGS_SET_FZR | NVME_ZFLAGS_TS_DELAY); > + zone->d.za &= ~NVME_ZA_FINISH_RECOMMENDED; > + zone->tstamp = 0; > + } > + if (n->params.rrl_usec) { > + zone->flags |= NVME_ZFLAGS_SET_RZR; > + if (n->params.rzr_delay_usec) { > + zone->tstamp = qemu_clock_get_ns(QEMU_CLOCK_REALTIME); > + zone->flags |= NVME_ZFLAGS_TS_DELAY; > + } else { > + nvme_set_rzr(n, ns, zone); > + } > + } > +} > + > +static void nvme_schedule_fzr(NvmeCtrl *n, NvmeNamespace *ns, NvmeZone *zone) > +{ > + if (n->params.rrl_usec) { > + zone->flags &= ~(NVME_ZFLAGS_SET_RZR | NVME_ZFLAGS_TS_DELAY); > + zone->d.za &= ~NVME_ZA_RESET_RECOMMENDED; > + zone->tstamp = 0; > + } > + if (n->params.frl_usec) { > + zone->flags |= NVME_ZFLAGS_SET_FZR; > + if (n->params.fzr_delay_usec) { > + zone->tstamp = qemu_clock_get_ns(QEMU_CLOCK_REALTIME); > + zone->flags |= NVME_ZFLAGS_TS_DELAY; > + } else { > + nvme_set_fzr(n, ns, zone); > + } > + } > +} > + > static void nvme_assign_zone_state(NvmeCtrl *n, NvmeNamespace *ns, > NvmeZone *zone, uint8_t state) > { > @@ -208,15 +286,19 @@ static void nvme_assign_zone_state(NvmeCtrl *n, NvmeNamespace *ns, > switch (nvme_get_zone_state(zone)) { > case NVME_ZONE_STATE_EXPLICITLY_OPEN: > nvme_remove_zone(n, ns, ns->exp_open_zones, zone); > + nvme_clear_fzr(n, ns, zone, false); > break; > case NVME_ZONE_STATE_IMPLICITLY_OPEN: > nvme_remove_zone(n, ns, ns->imp_open_zones, zone); > + nvme_clear_fzr(n, ns, zone, false); > break; > case NVME_ZONE_STATE_CLOSED: > nvme_remove_zone(n, ns, ns->closed_zones, zone); > + nvme_clear_fzr(n, ns, zone, false); > break; > case NVME_ZONE_STATE_FULL: > nvme_remove_zone(n, ns, ns->full_zones, zone); > + nvme_clear_rzr(n, ns, zone, false); > } > } > > @@ -225,15 +307,19 @@ static void nvme_assign_zone_state(NvmeCtrl *n, NvmeNamespace *ns, > switch (state) { > case NVME_ZONE_STATE_EXPLICITLY_OPEN: > nvme_add_zone_tail(n, ns, ns->exp_open_zones, zone); > + nvme_schedule_fzr(n, ns, zone); > break; > case NVME_ZONE_STATE_IMPLICITLY_OPEN: > nvme_add_zone_tail(n, ns, ns->imp_open_zones, zone); > + nvme_schedule_fzr(n, ns, zone); > break; > case NVME_ZONE_STATE_CLOSED: > nvme_add_zone_tail(n, ns, ns->closed_zones, zone); > + nvme_schedule_fzr(n, ns, zone); > break; > case NVME_ZONE_STATE_FULL: > nvme_add_zone_tail(n, ns, ns->full_zones, zone); > + nvme_schedule_rzr(n, ns, zone); > break; > default: > zone->d.za = 0; > @@ -555,6 +641,7 @@ static void nvme_auto_transition_zone(NvmeCtrl *n, NvmeNamespace *ns, > zone->d.za &= ~(NVME_ZA_FINISH_RECOMMENDED | > NVME_ZA_RESET_RECOMMENDED); > zone->d.za |= NVME_ZA_FINISHED_BY_CTLR; > + zone->flags = 0; > zone->tstamp = 0; > trace_pci_nvme_zone_finished_by_controller(zone->d.zslba); > } > @@ -2624,6 +2711,11 @@ static void nvme_zoned_init_ctrl(NvmeCtrl *n, Error **errp) > n->num_zones = nz; > n->zone_array_size = sizeof(NvmeZone) * nz; > > + n->params.rzr_delay_usec *= SCALE_MS; > + n->params.rrl_usec *= SCALE_MS; > + n->params.fzr_delay_usec *= SCALE_MS; > + n->params.frl_usec *= SCALE_MS; > + I would prefer that user-given parameters are not changed like this. Setting defaults for various reasons are OK, but when the meaning of the parameter changes (like the scale), its confusing. I would suggest that the namespace gets the set of *_usec members and the parameters are name without the usec. > /* Make sure that the values of all Zoned Command Set properties are sane */ > if (n->params.max_open_zones > nz) { > n->params.max_open_zones = nz; > @@ -2651,6 +2743,8 @@ static int nvme_zoned_init_ns(NvmeCtrl *n, NvmeNamespace *ns, int lba_index, > /* MAR/MOR are zeroes-based, 0xffffffff means no limit */ > ns->id_ns_zoned->mar = cpu_to_le32(n->params.max_active_zones - 1); > ns->id_ns_zoned->mor = cpu_to_le32(n->params.max_open_zones - 1); > + ns->id_ns_zoned->rrl = cpu_to_le32(n->params.rrl_usec / (1000 * SCALE_MS)); > + ns->id_ns_zoned->frl = cpu_to_le32(n->params.frl_usec / (1000 * SCALE_MS)); > ns->id_ns_zoned->zoc = cpu_to_le16(n->params.active_excursions ? 0x2 : 0); > ns->id_ns_zoned->ozcs = n->params.cross_zone_read ? 0x01 : 0x00; > > @@ -3012,6 +3106,11 @@ static Property nvme_props[] = { > DEFINE_PROP_UINT32("zone_append_max_size", NvmeCtrl, params.zamds_bs, 0), > DEFINE_PROP_INT32("max_active", NvmeCtrl, params.max_active_zones, 0), > DEFINE_PROP_INT32("max_open", NvmeCtrl, params.max_open_zones, 0), > + DEFINE_PROP_UINT64("reset_rcmnd_delay", NvmeCtrl, params.rzr_delay_usec, 0), > + DEFINE_PROP_UINT64("reset_rcmnd_limit", NvmeCtrl, params.rrl_usec, 0), > + DEFINE_PROP_UINT64("finish_rcmnd_delay", NvmeCtrl, > + params.fzr_delay_usec, 0), > + DEFINE_PROP_UINT64("finish_rcmnd_limit", NvmeCtrl, params.frl_usec, 0), > DEFINE_PROP_BOOL("cross_zone_read", NvmeCtrl, params.cross_zone_read, true), > DEFINE_PROP_BOOL("active_excursions", NvmeCtrl, params.active_excursions, > false), > diff --git a/hw/block/nvme.h b/hw/block/nvme.h > index 8a0aaeb09a..be1920f1ef 100644 > --- a/hw/block/nvme.h > +++ b/hw/block/nvme.h > @@ -22,6 +22,10 @@ typedef struct NvmeParams { > uint64_t zone_capacity; > int32_t max_active_zones; > int32_t max_open_zones; > + uint64_t rzr_delay_usec; > + uint64_t rrl_usec; > + uint64_t fzr_delay_usec; > + uint64_t frl_usec; > } NvmeParams; > > typedef struct NvmeAsyncEvent { > @@ -77,12 +81,19 @@ typedef struct NvmeCQueue { > QTAILQ_HEAD(, NvmeRequest) req_list; > } NvmeCQueue; > > +enum NvmeZoneFlags { > + NVME_ZFLAGS_TS_DELAY = 1 << 0, > + NVME_ZFLAGS_SET_RZR = 1 << 1, > + NVME_ZFLAGS_SET_FZR = 1 << 2, > +}; > + > typedef struct NvmeZone { > NvmeZoneDescr d; > uint64_t tstamp; > + uint32_t flags; > uint32_t next; > uint32_t prev; > - uint8_t rsvd80[8]; > + uint8_t rsvd84[4]; > } NvmeZone; > > #define NVME_ZONE_LIST_NIL UINT_MAX > -- > 2.21.0 > >
diff --git a/hw/block/nvme.c b/hw/block/nvme.c index a29cbfcc96..c3898448c7 100644 --- a/hw/block/nvme.c +++ b/hw/block/nvme.c @@ -201,6 +201,84 @@ static inline void nvme_aor_dec_active(NvmeCtrl *n, NvmeNamespace *ns) assert(ns->nr_active_zones >= 0); } +static void nvme_set_rzr(NvmeCtrl *n, NvmeNamespace *ns, NvmeZone *zone) +{ + assert(zone->flags & NVME_ZFLAGS_SET_RZR); + zone->tstamp = qemu_clock_get_ns(QEMU_CLOCK_REALTIME); + zone->flags &= ~NVME_ZFLAGS_TS_DELAY; + zone->d.za |= NVME_ZA_RESET_RECOMMENDED; + zone->flags &= ~NVME_ZFLAGS_SET_RZR; + trace_pci_nvme_zone_reset_recommended(zone->d.zslba); +} + +static void nvme_clear_rzr(NvmeCtrl *n, NvmeNamespace *ns, + NvmeZone *zone, bool notify) +{ + if (n->params.rrl_usec) { + zone->flags &= ~(NVME_ZFLAGS_SET_RZR | NVME_ZFLAGS_TS_DELAY); + notify = notify && (zone->d.za & NVME_ZA_RESET_RECOMMENDED); + zone->d.za &= ~NVME_ZA_RESET_RECOMMENDED; + zone->tstamp = 0; + } +} + +static void nvme_set_fzr(NvmeCtrl *n, NvmeNamespace *ns, NvmeZone *zone) +{ + assert(zone->flags & NVME_ZFLAGS_SET_FZR); + zone->tstamp = qemu_clock_get_ns(QEMU_CLOCK_REALTIME); + zone->flags &= ~NVME_ZFLAGS_TS_DELAY; + zone->d.za |= NVME_ZA_FINISH_RECOMMENDED; + zone->flags &= ~NVME_ZFLAGS_SET_FZR; + trace_pci_nvme_zone_finish_recommended(zone->d.zslba); +} + +static void nvme_clear_fzr(NvmeCtrl *n, NvmeNamespace *ns, + NvmeZone *zone, bool notify) +{ + if (n->params.frl_usec) { + zone->flags &= ~(NVME_ZFLAGS_SET_FZR | NVME_ZFLAGS_TS_DELAY); + notify = notify && (zone->d.za & NVME_ZA_FINISH_RECOMMENDED); + zone->d.za &= ~NVME_ZA_FINISH_RECOMMENDED; + zone->tstamp = 0; + } +} + +static void nvme_schedule_rzr(NvmeCtrl *n, NvmeNamespace *ns, NvmeZone *zone) +{ + if (n->params.frl_usec) { + zone->flags &= ~(NVME_ZFLAGS_SET_FZR | NVME_ZFLAGS_TS_DELAY); + zone->d.za &= ~NVME_ZA_FINISH_RECOMMENDED; + zone->tstamp = 0; + } + if (n->params.rrl_usec) { + zone->flags |= NVME_ZFLAGS_SET_RZR; + if (n->params.rzr_delay_usec) { + zone->tstamp = qemu_clock_get_ns(QEMU_CLOCK_REALTIME); + zone->flags |= NVME_ZFLAGS_TS_DELAY; + } else { + nvme_set_rzr(n, ns, zone); + } + } +} + +static void nvme_schedule_fzr(NvmeCtrl *n, NvmeNamespace *ns, NvmeZone *zone) +{ + if (n->params.rrl_usec) { + zone->flags &= ~(NVME_ZFLAGS_SET_RZR | NVME_ZFLAGS_TS_DELAY); + zone->d.za &= ~NVME_ZA_RESET_RECOMMENDED; + zone->tstamp = 0; + } + if (n->params.frl_usec) { + zone->flags |= NVME_ZFLAGS_SET_FZR; + if (n->params.fzr_delay_usec) { + zone->tstamp = qemu_clock_get_ns(QEMU_CLOCK_REALTIME); + zone->flags |= NVME_ZFLAGS_TS_DELAY; + } else { + nvme_set_fzr(n, ns, zone); + } + } +} + static void nvme_assign_zone_state(NvmeCtrl *n, NvmeNamespace *ns, NvmeZone *zone, uint8_t state) { @@ -208,15 +286,19 @@ static void nvme_assign_zone_state(NvmeCtrl *n, NvmeNamespace *ns, switch (nvme_get_zone_state(zone)) { case NVME_ZONE_STATE_EXPLICITLY_OPEN: nvme_remove_zone(n, ns, ns->exp_open_zones, zone); + nvme_clear_fzr(n, ns, zone, false); break; case NVME_ZONE_STATE_IMPLICITLY_OPEN: nvme_remove_zone(n, ns, ns->imp_open_zones, zone); + nvme_clear_fzr(n, ns, zone, false); break; case NVME_ZONE_STATE_CLOSED: nvme_remove_zone(n, ns, ns->closed_zones, zone); + nvme_clear_fzr(n, ns, zone, false); break; case NVME_ZONE_STATE_FULL: nvme_remove_zone(n, ns, ns->full_zones, zone); + nvme_clear_rzr(n, ns, zone, false); } } @@ -225,15 +307,19 @@ static void nvme_assign_zone_state(NvmeCtrl *n, NvmeNamespace *ns, switch (state) { case NVME_ZONE_STATE_EXPLICITLY_OPEN: nvme_add_zone_tail(n, ns, ns->exp_open_zones, zone); + nvme_schedule_fzr(n, ns, zone); break; case NVME_ZONE_STATE_IMPLICITLY_OPEN: nvme_add_zone_tail(n, ns, ns->imp_open_zones, zone); + nvme_schedule_fzr(n, ns, zone); break; case NVME_ZONE_STATE_CLOSED: nvme_add_zone_tail(n, ns, ns->closed_zones, zone); + nvme_schedule_fzr(n, ns, zone); break; case NVME_ZONE_STATE_FULL: nvme_add_zone_tail(n, ns, ns->full_zones, zone); + nvme_schedule_rzr(n, ns, zone); break; default: zone->d.za = 0; @@ -555,6 +641,7 @@ static void nvme_auto_transition_zone(NvmeCtrl *n, NvmeNamespace *ns, zone->d.za &= ~(NVME_ZA_FINISH_RECOMMENDED | NVME_ZA_RESET_RECOMMENDED); zone->d.za |= NVME_ZA_FINISHED_BY_CTLR; + zone->flags = 0; zone->tstamp = 0; trace_pci_nvme_zone_finished_by_controller(zone->d.zslba); } @@ -2624,6 +2711,11 @@ static void nvme_zoned_init_ctrl(NvmeCtrl *n, Error **errp) n->num_zones = nz; n->zone_array_size = sizeof(NvmeZone) * nz; + n->params.rzr_delay_usec *= SCALE_MS; + n->params.rrl_usec *= SCALE_MS; + n->params.fzr_delay_usec *= SCALE_MS; + n->params.frl_usec *= SCALE_MS; + /* Make sure that the values of all Zoned Command Set properties are sane */ if (n->params.max_open_zones > nz) { n->params.max_open_zones = nz; @@ -2651,6 +2743,8 @@ static int nvme_zoned_init_ns(NvmeCtrl *n, NvmeNamespace *ns, int lba_index, /* MAR/MOR are zeroes-based, 0xffffffff means no limit */ ns->id_ns_zoned->mar = cpu_to_le32(n->params.max_active_zones - 1); ns->id_ns_zoned->mor = cpu_to_le32(n->params.max_open_zones - 1); + ns->id_ns_zoned->rrl = cpu_to_le32(n->params.rrl_usec / (1000 * SCALE_MS)); + ns->id_ns_zoned->frl = cpu_to_le32(n->params.frl_usec / (1000 * SCALE_MS)); ns->id_ns_zoned->zoc = cpu_to_le16(n->params.active_excursions ? 0x2 : 0); ns->id_ns_zoned->ozcs = n->params.cross_zone_read ? 0x01 : 0x00; @@ -3012,6 +3106,11 @@ static Property nvme_props[] = { DEFINE_PROP_UINT32("zone_append_max_size", NvmeCtrl, params.zamds_bs, 0), DEFINE_PROP_INT32("max_active", NvmeCtrl, params.max_active_zones, 0), DEFINE_PROP_INT32("max_open", NvmeCtrl, params.max_open_zones, 0), + DEFINE_PROP_UINT64("reset_rcmnd_delay", NvmeCtrl, params.rzr_delay_usec, 0), + DEFINE_PROP_UINT64("reset_rcmnd_limit", NvmeCtrl, params.rrl_usec, 0), + DEFINE_PROP_UINT64("finish_rcmnd_delay", NvmeCtrl, + params.fzr_delay_usec, 0), + DEFINE_PROP_UINT64("finish_rcmnd_limit", NvmeCtrl, params.frl_usec, 0), DEFINE_PROP_BOOL("cross_zone_read", NvmeCtrl, params.cross_zone_read, true), DEFINE_PROP_BOOL("active_excursions", NvmeCtrl, params.active_excursions, false), diff --git a/hw/block/nvme.h b/hw/block/nvme.h index 8a0aaeb09a..be1920f1ef 100644 --- a/hw/block/nvme.h +++ b/hw/block/nvme.h @@ -22,6 +22,10 @@ typedef struct NvmeParams { uint64_t zone_capacity; int32_t max_active_zones; int32_t max_open_zones; + uint64_t rzr_delay_usec; + uint64_t rrl_usec; + uint64_t fzr_delay_usec; + uint64_t frl_usec; } NvmeParams; typedef struct NvmeAsyncEvent { @@ -77,12 +81,19 @@ typedef struct NvmeCQueue { QTAILQ_HEAD(, NvmeRequest) req_list; } NvmeCQueue; +enum NvmeZoneFlags { + NVME_ZFLAGS_TS_DELAY = 1 << 0, + NVME_ZFLAGS_SET_RZR = 1 << 1, + NVME_ZFLAGS_SET_FZR = 1 << 2, +}; + typedef struct NvmeZone { NvmeZoneDescr d; uint64_t tstamp; + uint32_t flags; uint32_t next; uint32_t prev; - uint8_t rsvd80[8]; + uint8_t rsvd84[4]; } NvmeZone; #define NVME_ZONE_LIST_NIL UINT_MAX
Added logic to set and reset FZR and RZR zone attributes. Four new driver properties are added to control the timing of setting and resetting these attributes. FZR/RZR delay lasts from the zone operation and until when the corresponding zone attribute is set. FZR/RZR limits set the time period between setting FZR or RZR attribute and resetting it simulating the internal controller action on that zone. Signed-off-by: Dmitry Fomichev <dmitry.fomichev@wdc.com> --- hw/block/nvme.c | 99 +++++++++++++++++++++++++++++++++++++++++++++++++ hw/block/nvme.h | 13 ++++++- 2 files changed, 111 insertions(+), 1 deletion(-)