Message ID | 20200928023528.15260-7-dmitry.fomichev@wdc.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | hw/block/nvme: Support Namespace Types and Zoned Namespace Command Set | expand |
On Mon, Sep 28, 2020 at 11:35:20AM +0900, Dmitry Fomichev wrote: > From: Niklas Cassel <niklas.cassel@wdc.com> > > In NVMe, a namespace is active if it exists and is attached to the > controller. > > CAP.CSS (together with the I/O Command Set data structure) defines what > command sets are supported by the controller. > > CC.CSS (together with Set Profile) can be set to enable a subset of the > available command sets. The namespaces belonging to a disabled command set > will not be able to attach to the controller, and will thus be inactive. > > E.g., if the user sets CC.CSS to Admin Only, NVM namespaces should be > marked as inactive. > > The identify namespace, the identify namespace CSI specific, and the namespace > list commands have two different versions, one that only shows active > namespaces, and the other version that shows existing namespaces, regardless > of whether the namespace is attached or not. > > Add an attached member to struct NvmeNamespace, and implement the missing CNS > commands. > > The added functionality will also simplify the implementation of namespace > management in the future, since namespace management can also attach and > detach namespaces. Following my previous discussion with Klaus, I think we need to rewrite this commit message completely: Subject: hw/block/nvme: Add support for allocated CNS command variants Many CNS commands have "allocated" command variants. These includes a namespace as long as it is allocated (i.e. a namespace is included regardless if it is active (attached) or not.) While these commands are optional (they are mandatory for controllers supporting the namespace attachment command), our QEMU implementation is more complete by actually providing support for these CNS values. However, since our QEMU model currently does not support the namespace attachment command, these new allocated CNS commands will return the same result as the active CNS command variants. In NVMe, a namespace is active if it exists and is attached to the controller. CAP.CSS (together with the I/O Command Set data structure) defines what command sets are supported by the controller. CC.CSS (together with Set Profile) can be set to enable a subset of the available command sets. Even if a user configures CC.CSS to e.g. Admin only, NVM namespaces will still be attached (and thus marked as active). Similarly, if a user configures CC.CSS to e.g. NVM, ZNS namespaces will still be attached (and thus marked as active). However, any operation from a disabled command set will result in a Invalid Command Opcode. Add an attached struct member for struct NvmeNamespace, so that we lay the foundation for namespace attachment support. Also implement logic in the new CNS values to include/exclude namespaces based on this new struct member. The only thing missing hooking up the actual Namespace Attachment command opcode, which allows a user to toggle the attached variable per namespace. The reason for not hooking up this command completely is because the NVMe specification requires that the namespace managment command is supported if the namespacement attachment command is supported. > > Signed-off-by: Niklas Cassel <niklas.cassel@wdc.com> > Signed-off-by: Dmitry Fomichev <dmitry.fomichev@wdc.com> > --- > hw/block/nvme-ns.h | 1 + > hw/block/nvme.c | 60 ++++++++++++++++++++++++++++++++++++++------ > include/block/nvme.h | 20 +++++++++------ > 3 files changed, 65 insertions(+), 16 deletions(-) > > diff --git a/hw/block/nvme-ns.h b/hw/block/nvme-ns.h > index cca23bc0b3..acdb76f058 100644 > --- a/hw/block/nvme-ns.h > +++ b/hw/block/nvme-ns.h > @@ -22,6 +22,7 @@ > typedef struct NvmeNamespaceParams { > uint32_t nsid; > uint8_t csi; > + bool attached; > QemuUUID uuid; > } NvmeNamespaceParams; > > diff --git a/hw/block/nvme.c b/hw/block/nvme.c > index 4ec1ddc90a..63ad03d6d6 100644 > --- a/hw/block/nvme.c > +++ b/hw/block/nvme.c We need to add an additional check in nvme_io_cmd() that returns Invalid Command Opcode when CC.CSS == Admin only. > @@ -1523,7 +1523,8 @@ static uint16_t nvme_identify_ctrl_csi(NvmeCtrl *n, NvmeRequest *req) > return NVME_INVALID_FIELD | NVME_DNR; > } > > -static uint16_t nvme_identify_ns(NvmeCtrl *n, NvmeRequest *req) > +static uint16_t nvme_identify_ns(NvmeCtrl *n, NvmeRequest *req, > + bool only_active) > { > NvmeNamespace *ns; > NvmeIdentify *c = (NvmeIdentify *)&req->cmd; > @@ -1540,11 +1541,16 @@ static uint16_t nvme_identify_ns(NvmeCtrl *n, NvmeRequest *req) > return nvme_rpt_empty_id_struct(n, req); > } > > + if (only_active && !ns->params.attached) { > + return nvme_rpt_empty_id_struct(n, req); > + } > + > return nvme_dma(n, (uint8_t *)&ns->id_ns, sizeof(NvmeIdNs), > DMA_DIRECTION_FROM_DEVICE, req); > } > > -static uint16_t nvme_identify_ns_csi(NvmeCtrl *n, NvmeRequest *req) > +static uint16_t nvme_identify_ns_csi(NvmeCtrl *n, NvmeRequest *req, > + bool only_active) > { > NvmeNamespace *ns; > NvmeIdentify *c = (NvmeIdentify *)&req->cmd; > @@ -1561,6 +1567,10 @@ static uint16_t nvme_identify_ns_csi(NvmeCtrl *n, NvmeRequest *req) > return nvme_rpt_empty_id_struct(n, req); > } > > + if (only_active && !ns->params.attached) { > + return nvme_rpt_empty_id_struct(n, req); > + } > + > if (c->csi == NVME_CSI_NVM) { > return nvme_rpt_empty_id_struct(n, req); > } > @@ -1568,7 +1578,8 @@ static uint16_t nvme_identify_ns_csi(NvmeCtrl *n, NvmeRequest *req) > return NVME_INVALID_FIELD | NVME_DNR; > } > > -static uint16_t nvme_identify_nslist(NvmeCtrl *n, NvmeRequest *req) > +static uint16_t nvme_identify_nslist(NvmeCtrl *n, NvmeRequest *req, > + bool only_active) > { > NvmeNamespace *ns; > NvmeIdentify *c = (NvmeIdentify *)&req->cmd; > @@ -1598,6 +1609,9 @@ static uint16_t nvme_identify_nslist(NvmeCtrl *n, NvmeRequest *req) > if (ns->params.nsid < min_nsid) { > continue; > } > + if (only_active && !ns->params.attached) { > + continue; > + } > list_ptr[j++] = cpu_to_le32(ns->params.nsid); > if (j == data_len / sizeof(uint32_t)) { > break; > @@ -1607,7 +1621,8 @@ static uint16_t nvme_identify_nslist(NvmeCtrl *n, NvmeRequest *req) > return nvme_dma(n, list, data_len, DMA_DIRECTION_FROM_DEVICE, req); > } > > -static uint16_t nvme_identify_nslist_csi(NvmeCtrl *n, NvmeRequest *req) > +static uint16_t nvme_identify_nslist_csi(NvmeCtrl *n, NvmeRequest *req, > + bool only_active) > { > NvmeNamespace *ns; > NvmeIdentify *c = (NvmeIdentify *)&req->cmd; > @@ -1631,6 +1646,9 @@ static uint16_t nvme_identify_nslist_csi(NvmeCtrl *n, NvmeRequest *req) > if (ns->params.nsid < min_nsid) { > continue; > } > + if (only_active && !ns->params.attached) { > + continue; > + } > list_ptr[j++] = cpu_to_le32(ns->params.nsid); > if (j == data_len / sizeof(uint32_t)) { > break; > @@ -1700,17 +1718,25 @@ static uint16_t nvme_identify(NvmeCtrl *n, NvmeRequest *req) > > switch (le32_to_cpu(c->cns)) { > case NVME_ID_CNS_NS: > - return nvme_identify_ns(n, req); > + return nvme_identify_ns(n, req, true); > case NVME_ID_CNS_CS_NS: > - return nvme_identify_ns_csi(n, req); > + return nvme_identify_ns_csi(n, req, true); > + case NVME_ID_CNS_NS_PRESENT: > + return nvme_identify_ns(n, req, false); > + case NVME_ID_CNS_CS_NS_PRESENT: > + return nvme_identify_ns_csi(n, req, false); > case NVME_ID_CNS_CTRL: > return nvme_identify_ctrl(n, req); > case NVME_ID_CNS_CS_CTRL: > return nvme_identify_ctrl_csi(n, req); > case NVME_ID_CNS_NS_ACTIVE_LIST: > - return nvme_identify_nslist(n, req); > + return nvme_identify_nslist(n, req, true); > case NVME_ID_CNS_CS_NS_ACTIVE_LIST: > - return nvme_identify_nslist_csi(n, req); > + return nvme_identify_nslist_csi(n, req, true); > + case NVME_ID_CNS_NS_PRESENT_LIST: > + return nvme_identify_nslist(n, req, false); > + case NVME_ID_CNS_CS_NS_PRESENT_LIST: > + return nvme_identify_nslist_csi(n, req, false); > case NVME_ID_CNS_NS_DESCR_LIST: > return nvme_identify_ns_descr_list(n, req); > case NVME_ID_CNS_IO_COMMAND_SET: > @@ -2188,8 +2214,10 @@ static void nvme_clear_ctrl(NvmeCtrl *n) > > static int nvme_start_ctrl(NvmeCtrl *n) > { > + NvmeNamespace *ns; > uint32_t page_bits = NVME_CC_MPS(n->bar.cc) + 12; > uint32_t page_size = 1 << page_bits; > + int i; > > if (unlikely(n->cq[0])) { > trace_pci_nvme_err_startfail_cq(); > @@ -2276,6 +2304,22 @@ static int nvme_start_ctrl(NvmeCtrl *n) > nvme_init_sq(&n->admin_sq, n, n->bar.asq, 0, 0, > NVME_AQA_ASQS(n->bar.aqa) + 1); > > + for (i = 1; i <= n->num_namespaces; i++) { > + ns = nvme_ns(n, i); > + if (!ns) { > + continue; > + } > + ns->params.attached = false; > + switch (ns->params.csi) { > + case NVME_CSI_NVM: > + if (NVME_CC_CSS(n->bar.cc) == CSS_NVM_ONLY || > + NVME_CC_CSS(n->bar.cc) == CSS_CSI) { > + ns->params.attached = true; > + } > + break; > + } > + } > + Considering that the controller doesn't attach/detach namespaces belonging to command sets that it doesn't support, I think a nicer way is to remove this for-loop, and instead, in nvme_ns_setup() or nvme_ns_init(), always set attached = true. (Since we currently don't support namespace attachment command). The person that implements the last piece of namespace management and namespace attachment will have to deal with reading "attached" from some kind of persistent state and setting it accordingly. > nvme_set_timestamp(n, 0ULL); > > QTAILQ_INIT(&n->aer_queue); > diff --git a/include/block/nvme.h b/include/block/nvme.h > index 4587311783..b182fe40b2 100644 > --- a/include/block/nvme.h > +++ b/include/block/nvme.h > @@ -804,14 +804,18 @@ typedef struct QEMU_PACKED NvmePSD { > #define NVME_IDENTIFY_DATA_SIZE 4096 > > enum NvmeIdCns { > - NVME_ID_CNS_NS = 0x00, > - NVME_ID_CNS_CTRL = 0x01, > - NVME_ID_CNS_NS_ACTIVE_LIST = 0x02, > - NVME_ID_CNS_NS_DESCR_LIST = 0x03, > - NVME_ID_CNS_CS_NS = 0x05, > - NVME_ID_CNS_CS_CTRL = 0x06, > - NVME_ID_CNS_CS_NS_ACTIVE_LIST = 0x07, > - NVME_ID_CNS_IO_COMMAND_SET = 0x1c, > + NVME_ID_CNS_NS = 0x00, > + NVME_ID_CNS_CTRL = 0x01, > + NVME_ID_CNS_NS_ACTIVE_LIST = 0x02, > + NVME_ID_CNS_NS_DESCR_LIST = 0x03, > + NVME_ID_CNS_CS_NS = 0x05, > + NVME_ID_CNS_CS_CTRL = 0x06, > + NVME_ID_CNS_CS_NS_ACTIVE_LIST = 0x07, > + NVME_ID_CNS_NS_PRESENT_LIST = 0x10, > + NVME_ID_CNS_NS_PRESENT = 0x11, > + NVME_ID_CNS_CS_NS_PRESENT_LIST = 0x1a, > + NVME_ID_CNS_CS_NS_PRESENT = 0x1b, > + NVME_ID_CNS_IO_COMMAND_SET = 0x1c, > }; > > typedef struct QEMU_PACKED NvmeIdCtrl { > -- > 2.21.0 >
On Wed, 2020-09-30 at 13:50 +0000, Niklas Cassel wrote: > On Mon, Sep 28, 2020 at 11:35:20AM +0900, Dmitry Fomichev wrote: > > From: Niklas Cassel <niklas.cassel@wdc.com> > > > > In NVMe, a namespace is active if it exists and is attached to the > > controller. > > > > CAP.CSS (together with the I/O Command Set data structure) defines what > > command sets are supported by the controller. > > > > CC.CSS (together with Set Profile) can be set to enable a subset of the > > available command sets. The namespaces belonging to a disabled command set > > will not be able to attach to the controller, and will thus be inactive. > > > > E.g., if the user sets CC.CSS to Admin Only, NVM namespaces should be > > marked as inactive. > > > > The identify namespace, the identify namespace CSI specific, and the namespace > > list commands have two different versions, one that only shows active > > namespaces, and the other version that shows existing namespaces, regardless > > of whether the namespace is attached or not. > > > > Add an attached member to struct NvmeNamespace, and implement the missing CNS > > commands. > > > > The added functionality will also simplify the implementation of namespace > > management in the future, since namespace management can also attach and > > detach namespaces. > > Following my previous discussion with Klaus, > I think we need to rewrite this commit message completely: > > Subject: hw/block/nvme: Add support for allocated CNS command variants > > Many CNS commands have "allocated" command variants. > These includes a namespace as long as it is allocated > (i.e. a namespace is included regardless if it is active (attached) > or not.) > > While these commands are optional (they are mandatory for controllers > supporting the namespace attachment command), our QEMU implementation > is more complete by actually providing support for these CNS values. > > However, since our QEMU model currently does not support the namespace > attachment command, these new allocated CNS commands will return the same > result as the active CNS command variants. > > In NVMe, a namespace is active if it exists and is attached to the > controller. > > CAP.CSS (together with the I/O Command Set data structure) defines what > command sets are supported by the controller. > > CC.CSS (together with Set Profile) can be set to enable a subset of the > available command sets. > > Even if a user configures CC.CSS to e.g. Admin only, NVM namespaces > will still be attached (and thus marked as active). > Similarly, if a user configures CC.CSS to e.g. NVM, ZNS namespaces > will still be attached (and thus marked as active). > > However, any operation from a disabled command set will result in a > Invalid Command Opcode. > > Add an attached struct member for struct NvmeNamespace, > so that we lay the foundation for namespace attachment > support. Also implement logic in the new CNS values to > include/exclude namespaces based on this new struct member. > The only thing missing hooking up the actual Namespace Attachment > command opcode, which allows a user to toggle the attached > variable per namespace. The reason for not hooking up this > command completely is because the NVMe specification > requires that the namespace managment command is supported > if the namespacement attachment command is supported. > OK, putting this in. > > > Signed-off-by: Niklas Cassel <niklas.cassel@wdc.com> > > Signed-off-by: Dmitry Fomichev <dmitry.fomichev@wdc.com> > > --- > > hw/block/nvme-ns.h | 1 + > > hw/block/nvme.c | 60 ++++++++++++++++++++++++++++++++++++++------ > > include/block/nvme.h | 20 +++++++++------ > > 3 files changed, 65 insertions(+), 16 deletions(-) > > > > diff --git a/hw/block/nvme-ns.h b/hw/block/nvme-ns.h > > index cca23bc0b3..acdb76f058 100644 > > --- a/hw/block/nvme-ns.h > > +++ b/hw/block/nvme-ns.h > > @@ -22,6 +22,7 @@ > > typedef struct NvmeNamespaceParams { > > uint32_t nsid; > > uint8_t csi; > > + bool attached; > > QemuUUID uuid; > > } NvmeNamespaceParams; > > > > diff --git a/hw/block/nvme.c b/hw/block/nvme.c > > index 4ec1ddc90a..63ad03d6d6 100644 > > --- a/hw/block/nvme.c > > +++ b/hw/block/nvme.c > > We need to add an additional check in nvme_io_cmd() > that returns Invalid Command Opcode when CC.CSS == Admin only. > I think Keith has this addition already queued. > > @@ -1523,7 +1523,8 @@ static uint16_t nvme_identify_ctrl_csi(NvmeCtrl *n, NvmeRequest *req) > > return NVME_INVALID_FIELD | NVME_DNR; > > } > > > > -static uint16_t nvme_identify_ns(NvmeCtrl *n, NvmeRequest *req) > > +static uint16_t nvme_identify_ns(NvmeCtrl *n, NvmeRequest *req, > > + bool only_active) > > { > > NvmeNamespace *ns; > > NvmeIdentify *c = (NvmeIdentify *)&req->cmd; > > @@ -1540,11 +1541,16 @@ static uint16_t nvme_identify_ns(NvmeCtrl *n, NvmeRequest *req) > > return nvme_rpt_empty_id_struct(n, req); > > } > > > > + if (only_active && !ns->params.attached) { > > + return nvme_rpt_empty_id_struct(n, req); > > + } > > + > > return nvme_dma(n, (uint8_t *)&ns->id_ns, sizeof(NvmeIdNs), > > DMA_DIRECTION_FROM_DEVICE, req); > > } > > > > -static uint16_t nvme_identify_ns_csi(NvmeCtrl *n, NvmeRequest *req) > > +static uint16_t nvme_identify_ns_csi(NvmeCtrl *n, NvmeRequest *req, > > + bool only_active) > > { > > NvmeNamespace *ns; > > NvmeIdentify *c = (NvmeIdentify *)&req->cmd; > > @@ -1561,6 +1567,10 @@ static uint16_t nvme_identify_ns_csi(NvmeCtrl *n, NvmeRequest *req) > > return nvme_rpt_empty_id_struct(n, req); > > } > > > > + if (only_active && !ns->params.attached) { > > + return nvme_rpt_empty_id_struct(n, req); > > + } > > + > > if (c->csi == NVME_CSI_NVM) { > > return nvme_rpt_empty_id_struct(n, req); > > } > > @@ -1568,7 +1578,8 @@ static uint16_t nvme_identify_ns_csi(NvmeCtrl *n, NvmeRequest *req) > > return NVME_INVALID_FIELD | NVME_DNR; > > } > > > > -static uint16_t nvme_identify_nslist(NvmeCtrl *n, NvmeRequest *req) > > +static uint16_t nvme_identify_nslist(NvmeCtrl *n, NvmeRequest *req, > > + bool only_active) > > { > > NvmeNamespace *ns; > > NvmeIdentify *c = (NvmeIdentify *)&req->cmd; > > @@ -1598,6 +1609,9 @@ static uint16_t nvme_identify_nslist(NvmeCtrl *n, NvmeRequest *req) > > if (ns->params.nsid < min_nsid) { > > continue; > > } > > + if (only_active && !ns->params.attached) { > > + continue; > > + } > > list_ptr[j++] = cpu_to_le32(ns->params.nsid); > > if (j == data_len / sizeof(uint32_t)) { > > break; > > @@ -1607,7 +1621,8 @@ static uint16_t nvme_identify_nslist(NvmeCtrl *n, NvmeRequest *req) > > return nvme_dma(n, list, data_len, DMA_DIRECTION_FROM_DEVICE, req); > > } > > > > -static uint16_t nvme_identify_nslist_csi(NvmeCtrl *n, NvmeRequest *req) > > +static uint16_t nvme_identify_nslist_csi(NvmeCtrl *n, NvmeRequest *req, > > + bool only_active) > > { > > NvmeNamespace *ns; > > NvmeIdentify *c = (NvmeIdentify *)&req->cmd; > > @@ -1631,6 +1646,9 @@ static uint16_t nvme_identify_nslist_csi(NvmeCtrl *n, NvmeRequest *req) > > if (ns->params.nsid < min_nsid) { > > continue; > > } > > + if (only_active && !ns->params.attached) { > > + continue; > > + } > > list_ptr[j++] = cpu_to_le32(ns->params.nsid); > > if (j == data_len / sizeof(uint32_t)) { > > break; > > @@ -1700,17 +1718,25 @@ static uint16_t nvme_identify(NvmeCtrl *n, NvmeRequest *req) > > > > switch (le32_to_cpu(c->cns)) { > > case NVME_ID_CNS_NS: > > - return nvme_identify_ns(n, req); > > + return nvme_identify_ns(n, req, true); > > case NVME_ID_CNS_CS_NS: > > - return nvme_identify_ns_csi(n, req); > > + return nvme_identify_ns_csi(n, req, true); > > + case NVME_ID_CNS_NS_PRESENT: > > + return nvme_identify_ns(n, req, false); > > + case NVME_ID_CNS_CS_NS_PRESENT: > > + return nvme_identify_ns_csi(n, req, false); > > case NVME_ID_CNS_CTRL: > > return nvme_identify_ctrl(n, req); > > case NVME_ID_CNS_CS_CTRL: > > return nvme_identify_ctrl_csi(n, req); > > case NVME_ID_CNS_NS_ACTIVE_LIST: > > - return nvme_identify_nslist(n, req); > > + return nvme_identify_nslist(n, req, true); > > case NVME_ID_CNS_CS_NS_ACTIVE_LIST: > > - return nvme_identify_nslist_csi(n, req); > > + return nvme_identify_nslist_csi(n, req, true); > > + case NVME_ID_CNS_NS_PRESENT_LIST: > > + return nvme_identify_nslist(n, req, false); > > + case NVME_ID_CNS_CS_NS_PRESENT_LIST: > > + return nvme_identify_nslist_csi(n, req, false); > > case NVME_ID_CNS_NS_DESCR_LIST: > > return nvme_identify_ns_descr_list(n, req); > > case NVME_ID_CNS_IO_COMMAND_SET: > > @@ -2188,8 +2214,10 @@ static void nvme_clear_ctrl(NvmeCtrl *n) > > > > static int nvme_start_ctrl(NvmeCtrl *n) > > { > > + NvmeNamespace *ns; > > uint32_t page_bits = NVME_CC_MPS(n->bar.cc) + 12; > > uint32_t page_size = 1 << page_bits; > > + int i; > > > > if (unlikely(n->cq[0])) { > > trace_pci_nvme_err_startfail_cq(); > > @@ -2276,6 +2304,22 @@ static int nvme_start_ctrl(NvmeCtrl *n) > > nvme_init_sq(&n->admin_sq, n, n->bar.asq, 0, 0, > > NVME_AQA_ASQS(n->bar.aqa) + 1); > > > > + for (i = 1; i <= n->num_namespaces; i++) { > > + ns = nvme_ns(n, i); > > + if (!ns) { > > + continue; > > + } > > + ns->params.attached = false; > > + switch (ns->params.csi) { > > + case NVME_CSI_NVM: > > + if (NVME_CC_CSS(n->bar.cc) == CSS_NVM_ONLY || > > + NVME_CC_CSS(n->bar.cc) == CSS_CSI) { > > + ns->params.attached = true; > > + } > > + break; > > + } > > + } > > + > > Considering that the controller doesn't attach/detach > namespaces belonging to command sets that it doesn't > support, I think a nicer way is to remove this for-loop, > and instead, in nvme_ns_setup() or nvme_ns_init(), > always set attached = true. (Since we currently don't > support namespace attachment command). > > The person that implements the last piece of namespace > management and namespace attachment will have to deal > with reading "attached" from some kind of persistent state I did some spec reading on this topic and it seems that this logic is necessary precisely because there is no attach/detach command available. Such a command would prevent attachment of a zoned namespace if CC.CSS is NVM_ONLY, right? But since we have a static config, we need to do this IMO. Also, 6.1.5 of the spec says that any operation that uses an inactive NSID shall fail with Invalid Field. I am adding a few bits to fail all i/o commands and set/get features attempted on inactive namespaces. > and setting it accordingly. > > > nvme_set_timestamp(n, 0ULL); > > > > QTAILQ_INIT(&n->aer_queue); > > diff --git a/include/block/nvme.h b/include/block/nvme.h > > index 4587311783..b182fe40b2 100644 > > --- a/include/block/nvme.h > > +++ b/include/block/nvme.h > > @@ -804,14 +804,18 @@ typedef struct QEMU_PACKED NvmePSD { > > #define NVME_IDENTIFY_DATA_SIZE 4096 > > > > enum NvmeIdCns { > > - NVME_ID_CNS_NS = 0x00, > > - NVME_ID_CNS_CTRL = 0x01, > > - NVME_ID_CNS_NS_ACTIVE_LIST = 0x02, > > - NVME_ID_CNS_NS_DESCR_LIST = 0x03, > > - NVME_ID_CNS_CS_NS = 0x05, > > - NVME_ID_CNS_CS_CTRL = 0x06, > > - NVME_ID_CNS_CS_NS_ACTIVE_LIST = 0x07, > > - NVME_ID_CNS_IO_COMMAND_SET = 0x1c, > > + NVME_ID_CNS_NS = 0x00, > > + NVME_ID_CNS_CTRL = 0x01, > > + NVME_ID_CNS_NS_ACTIVE_LIST = 0x02, > > + NVME_ID_CNS_NS_DESCR_LIST = 0x03, > > + NVME_ID_CNS_CS_NS = 0x05, > > + NVME_ID_CNS_CS_CTRL = 0x06, > > + NVME_ID_CNS_CS_NS_ACTIVE_LIST = 0x07, > > + NVME_ID_CNS_NS_PRESENT_LIST = 0x10, > > + NVME_ID_CNS_NS_PRESENT = 0x11, > > + NVME_ID_CNS_CS_NS_PRESENT_LIST = 0x1a, > > + NVME_ID_CNS_CS_NS_PRESENT = 0x1b, > > + NVME_ID_CNS_IO_COMMAND_SET = 0x1c, > > }; > > > > typedef struct QEMU_PACKED NvmeIdCtrl { > > -- > > 2.21.0
On Sun, Oct 04, 2020 at 11:54:13PM +0000, Dmitry Fomichev wrote: > On Wed, 2020-09-30 at 13:50 +0000, Niklas Cassel wrote: > > On Mon, Sep 28, 2020 at 11:35:20AM +0900, Dmitry Fomichev wrote: > > > From: Niklas Cassel <niklas.cassel@wdc.com> > > > > > > In NVMe, a namespace is active if it exists and is attached to the > > > controller. > > > > > > CAP.CSS (together with the I/O Command Set data structure) defines what > > > command sets are supported by the controller. > > > > > > CC.CSS (together with Set Profile) can be set to enable a subset of the > > > available command sets. The namespaces belonging to a disabled command set > > > will not be able to attach to the controller, and will thus be inactive. > > > > > > E.g., if the user sets CC.CSS to Admin Only, NVM namespaces should be > > > marked as inactive. > > > > > > The identify namespace, the identify namespace CSI specific, and the namespace > > > list commands have two different versions, one that only shows active > > > namespaces, and the other version that shows existing namespaces, regardless > > > of whether the namespace is attached or not. > > > > > > Add an attached member to struct NvmeNamespace, and implement the missing CNS > > > commands. > > > > > > The added functionality will also simplify the implementation of namespace > > > management in the future, since namespace management can also attach and > > > detach namespaces. > > > > Following my previous discussion with Klaus, > > I think we need to rewrite this commit message completely: > > > > Subject: hw/block/nvme: Add support for allocated CNS command variants > > > > Many CNS commands have "allocated" command variants. > > These includes a namespace as long as it is allocated > > (i.e. a namespace is included regardless if it is active (attached) > > or not.) > > > > While these commands are optional (they are mandatory for controllers > > supporting the namespace attachment command), our QEMU implementation > > is more complete by actually providing support for these CNS values. > > > > However, since our QEMU model currently does not support the namespace > > attachment command, these new allocated CNS commands will return the same > > result as the active CNS command variants. > > > > In NVMe, a namespace is active if it exists and is attached to the > > controller. > > > > CAP.CSS (together with the I/O Command Set data structure) defines what > > command sets are supported by the controller. > > > > CC.CSS (together with Set Profile) can be set to enable a subset of the > > available command sets. > > > > Even if a user configures CC.CSS to e.g. Admin only, NVM namespaces > > will still be attached (and thus marked as active). > > Similarly, if a user configures CC.CSS to e.g. NVM, ZNS namespaces > > will still be attached (and thus marked as active). > > > > However, any operation from a disabled command set will result in a > > Invalid Command Opcode. > > > > Add an attached struct member for struct NvmeNamespace, > > so that we lay the foundation for namespace attachment > > support. Also implement logic in the new CNS values to > > include/exclude namespaces based on this new struct member. > > The only thing missing hooking up the actual Namespace Attachment > > command opcode, which allows a user to toggle the attached > > variable per namespace. The reason for not hooking up this > > command completely is because the NVMe specification > > requires that the namespace managment command is supported > > if the namespacement attachment command is supported. > > > (snip) > > > @@ -2276,6 +2304,22 @@ static int nvme_start_ctrl(NvmeCtrl *n) > > > nvme_init_sq(&n->admin_sq, n, n->bar.asq, 0, 0, > > > NVME_AQA_ASQS(n->bar.aqa) + 1); > > > > > > + for (i = 1; i <= n->num_namespaces; i++) { > > > + ns = nvme_ns(n, i); > > > + if (!ns) { > > > + continue; > > > + } > > > + ns->params.attached = false; > > > + switch (ns->params.csi) { > > > + case NVME_CSI_NVM: > > > + if (NVME_CC_CSS(n->bar.cc) == CSS_NVM_ONLY || > > > + NVME_CC_CSS(n->bar.cc) == CSS_CSI) { > > > + ns->params.attached = true; > > > + } > > > + break; > > > + } > > > + } > > > + > > > > Considering that the controller doesn't attach/detach > > namespaces belonging to command sets that it doesn't > > support, I think a nicer way is to remove this for-loop, > > and instead, in nvme_ns_setup() or nvme_ns_init(), > > always set attached = true. (Since we currently don't > > support namespace attachment command). > > > > The person that implements the last piece of namespace > > management and namespace attachment will have to deal > > with reading "attached" from some kind of persistent state > > > I did some spec reading on this topic and it seems that > this logic is necessary precisely because there is no > attach/detach command available. Such a command would > prevent attachment of a zoned namespace if CC.CSS is > NVM_ONLY, right? But since we have a static config, we > need to do this IMO. As far as I understand the spec, a NVM Command Set namespace will be attached to the controller (thus active), regardless if you start the controller with CC.CSS = Admin only, or CC.CSS = NVM. (And as far as I understand, this doesn't depend on if the controller supports the namespace attachment command or not.) See the register description for CC.CSS: "If bit 44 is set to ‘1’ in the Command Sets Supported (CSS) field, then the value 111b indicates that only the Admin Command Set is supported and that no I/O Command Set or I/O Command Set Specific Admin commands are supported. When only the Admin Command Set is supported, any command submitted on an I/O Submission Queue and any I/O Command Set Specific Admin command submitted on the Admin Submission Queue is completed with status Invalid Command Opcode." So I think that no matter what CC.CSS setting you have, no namespace will ever be detached by the controller. It will still be attached, but you will get Invalid Command Opcode if sending any command. I assume that CC.CSS is way older than namespace management, so that is probably why CC.CSS simply causes "Invalid Command Opcode" rather than detaching namespaces. > > Also, 6.1.5 of the spec says that any operation that uses > an inactive NSID shall fail with Invalid Field. I am > adding a few bits to fail all i/o commands and set/get > features attempted on inactive namespaces. Inactive NSID == a NSID that is not attached. As far as I understand, the controller itself will never detach a namespace. And since the QEMU model right now does not support namespace management, neither will the user. So I don't see that we will have any inactive namespace. Therefore, I suggested that we remove this for-loop. (Or drop this patch all together, but I do think that it provides value to have the additional CNS commands implemented, even if they will return the same result as the exiting active CNS commands.) Kind regards, Niklas
diff --git a/hw/block/nvme-ns.h b/hw/block/nvme-ns.h index cca23bc0b3..acdb76f058 100644 --- a/hw/block/nvme-ns.h +++ b/hw/block/nvme-ns.h @@ -22,6 +22,7 @@ typedef struct NvmeNamespaceParams { uint32_t nsid; uint8_t csi; + bool attached; QemuUUID uuid; } NvmeNamespaceParams; diff --git a/hw/block/nvme.c b/hw/block/nvme.c index 4ec1ddc90a..63ad03d6d6 100644 --- a/hw/block/nvme.c +++ b/hw/block/nvme.c @@ -1523,7 +1523,8 @@ static uint16_t nvme_identify_ctrl_csi(NvmeCtrl *n, NvmeRequest *req) return NVME_INVALID_FIELD | NVME_DNR; } -static uint16_t nvme_identify_ns(NvmeCtrl *n, NvmeRequest *req) +static uint16_t nvme_identify_ns(NvmeCtrl *n, NvmeRequest *req, + bool only_active) { NvmeNamespace *ns; NvmeIdentify *c = (NvmeIdentify *)&req->cmd; @@ -1540,11 +1541,16 @@ static uint16_t nvme_identify_ns(NvmeCtrl *n, NvmeRequest *req) return nvme_rpt_empty_id_struct(n, req); } + if (only_active && !ns->params.attached) { + return nvme_rpt_empty_id_struct(n, req); + } + return nvme_dma(n, (uint8_t *)&ns->id_ns, sizeof(NvmeIdNs), DMA_DIRECTION_FROM_DEVICE, req); } -static uint16_t nvme_identify_ns_csi(NvmeCtrl *n, NvmeRequest *req) +static uint16_t nvme_identify_ns_csi(NvmeCtrl *n, NvmeRequest *req, + bool only_active) { NvmeNamespace *ns; NvmeIdentify *c = (NvmeIdentify *)&req->cmd; @@ -1561,6 +1567,10 @@ static uint16_t nvme_identify_ns_csi(NvmeCtrl *n, NvmeRequest *req) return nvme_rpt_empty_id_struct(n, req); } + if (only_active && !ns->params.attached) { + return nvme_rpt_empty_id_struct(n, req); + } + if (c->csi == NVME_CSI_NVM) { return nvme_rpt_empty_id_struct(n, req); } @@ -1568,7 +1578,8 @@ static uint16_t nvme_identify_ns_csi(NvmeCtrl *n, NvmeRequest *req) return NVME_INVALID_FIELD | NVME_DNR; } -static uint16_t nvme_identify_nslist(NvmeCtrl *n, NvmeRequest *req) +static uint16_t nvme_identify_nslist(NvmeCtrl *n, NvmeRequest *req, + bool only_active) { NvmeNamespace *ns; NvmeIdentify *c = (NvmeIdentify *)&req->cmd; @@ -1598,6 +1609,9 @@ static uint16_t nvme_identify_nslist(NvmeCtrl *n, NvmeRequest *req) if (ns->params.nsid < min_nsid) { continue; } + if (only_active && !ns->params.attached) { + continue; + } list_ptr[j++] = cpu_to_le32(ns->params.nsid); if (j == data_len / sizeof(uint32_t)) { break; @@ -1607,7 +1621,8 @@ static uint16_t nvme_identify_nslist(NvmeCtrl *n, NvmeRequest *req) return nvme_dma(n, list, data_len, DMA_DIRECTION_FROM_DEVICE, req); } -static uint16_t nvme_identify_nslist_csi(NvmeCtrl *n, NvmeRequest *req) +static uint16_t nvme_identify_nslist_csi(NvmeCtrl *n, NvmeRequest *req, + bool only_active) { NvmeNamespace *ns; NvmeIdentify *c = (NvmeIdentify *)&req->cmd; @@ -1631,6 +1646,9 @@ static uint16_t nvme_identify_nslist_csi(NvmeCtrl *n, NvmeRequest *req) if (ns->params.nsid < min_nsid) { continue; } + if (only_active && !ns->params.attached) { + continue; + } list_ptr[j++] = cpu_to_le32(ns->params.nsid); if (j == data_len / sizeof(uint32_t)) { break; @@ -1700,17 +1718,25 @@ static uint16_t nvme_identify(NvmeCtrl *n, NvmeRequest *req) switch (le32_to_cpu(c->cns)) { case NVME_ID_CNS_NS: - return nvme_identify_ns(n, req); + return nvme_identify_ns(n, req, true); case NVME_ID_CNS_CS_NS: - return nvme_identify_ns_csi(n, req); + return nvme_identify_ns_csi(n, req, true); + case NVME_ID_CNS_NS_PRESENT: + return nvme_identify_ns(n, req, false); + case NVME_ID_CNS_CS_NS_PRESENT: + return nvme_identify_ns_csi(n, req, false); case NVME_ID_CNS_CTRL: return nvme_identify_ctrl(n, req); case NVME_ID_CNS_CS_CTRL: return nvme_identify_ctrl_csi(n, req); case NVME_ID_CNS_NS_ACTIVE_LIST: - return nvme_identify_nslist(n, req); + return nvme_identify_nslist(n, req, true); case NVME_ID_CNS_CS_NS_ACTIVE_LIST: - return nvme_identify_nslist_csi(n, req); + return nvme_identify_nslist_csi(n, req, true); + case NVME_ID_CNS_NS_PRESENT_LIST: + return nvme_identify_nslist(n, req, false); + case NVME_ID_CNS_CS_NS_PRESENT_LIST: + return nvme_identify_nslist_csi(n, req, false); case NVME_ID_CNS_NS_DESCR_LIST: return nvme_identify_ns_descr_list(n, req); case NVME_ID_CNS_IO_COMMAND_SET: @@ -2188,8 +2214,10 @@ static void nvme_clear_ctrl(NvmeCtrl *n) static int nvme_start_ctrl(NvmeCtrl *n) { + NvmeNamespace *ns; uint32_t page_bits = NVME_CC_MPS(n->bar.cc) + 12; uint32_t page_size = 1 << page_bits; + int i; if (unlikely(n->cq[0])) { trace_pci_nvme_err_startfail_cq(); @@ -2276,6 +2304,22 @@ static int nvme_start_ctrl(NvmeCtrl *n) nvme_init_sq(&n->admin_sq, n, n->bar.asq, 0, 0, NVME_AQA_ASQS(n->bar.aqa) + 1); + for (i = 1; i <= n->num_namespaces; i++) { + ns = nvme_ns(n, i); + if (!ns) { + continue; + } + ns->params.attached = false; + switch (ns->params.csi) { + case NVME_CSI_NVM: + if (NVME_CC_CSS(n->bar.cc) == CSS_NVM_ONLY || + NVME_CC_CSS(n->bar.cc) == CSS_CSI) { + ns->params.attached = true; + } + break; + } + } + nvme_set_timestamp(n, 0ULL); QTAILQ_INIT(&n->aer_queue); diff --git a/include/block/nvme.h b/include/block/nvme.h index 4587311783..b182fe40b2 100644 --- a/include/block/nvme.h +++ b/include/block/nvme.h @@ -804,14 +804,18 @@ typedef struct QEMU_PACKED NvmePSD { #define NVME_IDENTIFY_DATA_SIZE 4096 enum NvmeIdCns { - NVME_ID_CNS_NS = 0x00, - NVME_ID_CNS_CTRL = 0x01, - NVME_ID_CNS_NS_ACTIVE_LIST = 0x02, - NVME_ID_CNS_NS_DESCR_LIST = 0x03, - NVME_ID_CNS_CS_NS = 0x05, - NVME_ID_CNS_CS_CTRL = 0x06, - NVME_ID_CNS_CS_NS_ACTIVE_LIST = 0x07, - NVME_ID_CNS_IO_COMMAND_SET = 0x1c, + NVME_ID_CNS_NS = 0x00, + NVME_ID_CNS_CTRL = 0x01, + NVME_ID_CNS_NS_ACTIVE_LIST = 0x02, + NVME_ID_CNS_NS_DESCR_LIST = 0x03, + NVME_ID_CNS_CS_NS = 0x05, + NVME_ID_CNS_CS_CTRL = 0x06, + NVME_ID_CNS_CS_NS_ACTIVE_LIST = 0x07, + NVME_ID_CNS_NS_PRESENT_LIST = 0x10, + NVME_ID_CNS_NS_PRESENT = 0x11, + NVME_ID_CNS_CS_NS_PRESENT_LIST = 0x1a, + NVME_ID_CNS_CS_NS_PRESENT = 0x1b, + NVME_ID_CNS_IO_COMMAND_SET = 0x1c, }; typedef struct QEMU_PACKED NvmeIdCtrl {