Message ID | 20200204095208.269131-9-k.jensen@samsung.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | nvme: support NVMe v1.3d, SGLs and multiple namespaces | expand |
On Tue, 2020-02-04 at 10:51 +0100, Klaus Jensen wrote: > This patch splits up nvme_realize into multiple individual functions, > each initializing a different subset of the device. > > Signed-off-by: Klaus Jensen <klaus.jensen@cnexlabs.com> > --- > hw/block/nvme.c | 175 +++++++++++++++++++++++++++++++----------------- > hw/block/nvme.h | 21 ++++++ > 2 files changed, 133 insertions(+), 63 deletions(-) > > diff --git a/hw/block/nvme.c b/hw/block/nvme.c > index e1810260d40b..81514eaef63a 100644 > --- a/hw/block/nvme.c > +++ b/hw/block/nvme.c > @@ -44,6 +44,7 @@ > #include "nvme.h" > > #define NVME_SPEC_VER 0x00010201 > +#define NVME_MAX_QS PCI_MSIX_FLAGS_QSIZE > > #define NVME_GUEST_ERR(trace, fmt, ...) \ > do { \ > @@ -1325,67 +1326,106 @@ static const MemoryRegionOps nvme_cmb_ops = { > }, > }; > > -static void nvme_realize(PCIDevice *pci_dev, Error **errp) > +static int nvme_check_constraints(NvmeCtrl *n, Error **errp) > { > - NvmeCtrl *n = NVME(pci_dev); > - NvmeIdCtrl *id = &n->id_ctrl; > - > - int i; > - int64_t bs_size; > - uint8_t *pci_conf; > - > - if (!n->params.num_queues) { > - error_setg(errp, "num_queues can't be zero"); > - return; > - } > + NvmeParams *params = &n->params; > > if (!n->conf.blk) { > - error_setg(errp, "drive property not set"); > - return; > + error_setg(errp, "nvme: block backend not configured"); > + return 1; As a matter of taste, negative values indicate error, and 0 is the success value. In Linux kernel this is even an official rule. > } > > - bs_size = blk_getlength(n->conf.blk); > - if (bs_size < 0) { > - error_setg(errp, "could not get backing file size"); > - return; > + if (!params->serial) { > + error_setg(errp, "nvme: serial not configured"); > + return 1; > } > > - if (!n->params.serial) { > - error_setg(errp, "serial property not set"); > - return; > + if ((params->num_queues < 1 || params->num_queues > NVME_MAX_QS)) { > + error_setg(errp, "nvme: invalid queue configuration"); Maybe something like "nvme: invalid queue count specified, should be between 1 and ..."? > + return 1; > } > + > + return 0; > +} > + > +static int nvme_init_blk(NvmeCtrl *n, Error **errp) > +{ > blkconf_blocksizes(&n->conf); > if (!blkconf_apply_backend_options(&n->conf, blk_is_read_only(n->conf.blk), > - false, errp)) { > - return; > + false, errp)) { > + return 1; > } > > - pci_conf = pci_dev->config; > - pci_conf[PCI_INTERRUPT_PIN] = 1; > - pci_config_set_prog_interface(pci_dev->config, 0x2); > - pci_config_set_class(pci_dev->config, PCI_CLASS_STORAGE_EXPRESS); > - pcie_endpoint_cap_init(pci_dev, 0x80); > + return 0; > +} > > +static void nvme_init_state(NvmeCtrl *n) > +{ > n->num_namespaces = 1; > n->reg_size = pow2ceil(0x1004 + 2 * (n->params.num_queues + 1) * 4); Isn't that wrong? First 4K of mmio (0x1000) is the registers, and that is followed by the doorbells, and each doorbell takes 8 bytes (assuming regular doorbell stride). so n->params.num_queues + 1 should be total number of queues, thus the 0x1004 should be 0x1000 IMHO. I might miss some rounding magic here though. > - n->ns_size = bs_size / (uint64_t)n->num_namespaces; > - > n->namespaces = g_new0(NvmeNamespace, n->num_namespaces); > n->sq = g_new0(NvmeSQueue *, n->params.num_queues); > n->cq = g_new0(NvmeCQueue *, n->params.num_queues); > +} > > - memory_region_init_io(&n->iomem, OBJECT(n), &nvme_mmio_ops, n, > - "nvme", n->reg_size); > +static void nvme_init_cmb(NvmeCtrl *n, PCIDevice *pci_dev) > +{ > + NVME_CMBLOC_SET_BIR(n->bar.cmbloc, 2); It would be nice to have #define for CMB bar number > + NVME_CMBLOC_SET_OFST(n->bar.cmbloc, 0); > + > + NVME_CMBSZ_SET_SQS(n->bar.cmbsz, 1); > + NVME_CMBSZ_SET_CQS(n->bar.cmbsz, 0); > + NVME_CMBSZ_SET_LISTS(n->bar.cmbsz, 0); > + NVME_CMBSZ_SET_RDS(n->bar.cmbsz, 1); > + NVME_CMBSZ_SET_WDS(n->bar.cmbsz, 1); > + NVME_CMBSZ_SET_SZU(n->bar.cmbsz, 2); > + NVME_CMBSZ_SET_SZ(n->bar.cmbsz, n->params.cmb_size_mb); > + > + n->cmbloc = n->bar.cmbloc; > + n->cmbsz = n->bar.cmbsz; > + > + n->cmbuf = g_malloc0(NVME_CMBSZ_GETSIZE(n->bar.cmbsz)); > + memory_region_init_io(&n->ctrl_mem, OBJECT(n), &nvme_cmb_ops, n, > + "nvme-cmb", NVME_CMBSZ_GETSIZE(n->bar.cmbsz)); > + pci_register_bar(pci_dev, NVME_CMBLOC_BIR(n->bar.cmbloc), Same here although since you read it here from the controller register, then maybe leave it as is. I prefer though for this kind of thing to have a #define and use it everywhere. > + PCI_BASE_ADDRESS_SPACE_MEMORY | PCI_BASE_ADDRESS_MEM_TYPE_64 | > + PCI_BASE_ADDRESS_MEM_PREFETCH, &n->ctrl_mem); > +} > + > +static void nvme_init_pci(NvmeCtrl *n, PCIDevice *pci_dev) > +{ > + uint8_t *pci_conf = pci_dev->config; > + > + pci_conf[PCI_INTERRUPT_PIN] = 1; > + pci_config_set_prog_interface(pci_conf, 0x2); Nitpick: How about adding some #define for that as well? (I know that this code is copied as is but still) > + pci_config_set_vendor_id(pci_conf, PCI_VENDOR_ID_INTEL); > + pci_config_set_device_id(pci_conf, 0x5845); > + pci_config_set_class(pci_conf, PCI_CLASS_STORAGE_EXPRESS); > + pcie_endpoint_cap_init(pci_dev, 0x80); > + > + memory_region_init_io(&n->iomem, OBJECT(n), &nvme_mmio_ops, n, "nvme", > + n->reg_size); Code on split lines should start at column right after the '(' Now its my turn to notice this - our checkpatch.pl doesn't check this, and I can't explain how often I am getting burnt on this myself. There are *lot* of these issues, I pointed out some of them but you should check all the patches for this. > pci_register_bar(pci_dev, 0, > PCI_BASE_ADDRESS_SPACE_MEMORY | PCI_BASE_ADDRESS_MEM_TYPE_64, > &n->iomem); Split line alignment issue here as well. > msix_init_exclusive_bar(pci_dev, n->params.num_queues, 4, NULL); > > + if (n->params.cmb_size_mb) { > + nvme_init_cmb(n, pci_dev); > + } > +} > + > +static void nvme_init_ctrl(NvmeCtrl *n) > +{ > + NvmeIdCtrl *id = &n->id_ctrl; > + NvmeParams *params = &n->params; > + uint8_t *pci_conf = n->parent_obj.config; > + > id->vid = cpu_to_le16(pci_get_word(pci_conf + PCI_VENDOR_ID)); > id->ssvid = cpu_to_le16(pci_get_word(pci_conf + PCI_SUBSYSTEM_VENDOR_ID)); > strpadcpy((char *)id->mn, sizeof(id->mn), "QEMU NVMe Ctrl", ' '); > strpadcpy((char *)id->fr, sizeof(id->fr), "1.0", ' '); > - strpadcpy((char *)id->sn, sizeof(id->sn), n->params.serial, ' '); > + strpadcpy((char *)id->sn, sizeof(id->sn), params->serial, ' '); > id->rab = 6; > id->ieee[0] = 0x00; > id->ieee[1] = 0x02; > @@ -1431,46 +1471,55 @@ static void nvme_realize(PCIDevice *pci_dev, Error **errp) > > n->bar.vs = NVME_SPEC_VER; > n->bar.intmc = n->bar.intms = 0; > +} > > - if (n->params.cmb_size_mb) { > +static int nvme_init_namespace(NvmeCtrl *n, NvmeNamespace *ns, Error **errp) > +{ > + int64_t bs_size; > + NvmeIdNs *id_ns = &ns->id_ns; > > - NVME_CMBLOC_SET_BIR(n->bar.cmbloc, 2); > - NVME_CMBLOC_SET_OFST(n->bar.cmbloc, 0); > + bs_size = blk_getlength(n->conf.blk); > + if (bs_size < 0) { > + error_setg_errno(errp, -bs_size, "blk_getlength"); > + return 1; > + } > > - NVME_CMBSZ_SET_SQS(n->bar.cmbsz, 1); > - NVME_CMBSZ_SET_CQS(n->bar.cmbsz, 0); > - NVME_CMBSZ_SET_LISTS(n->bar.cmbsz, 0); > - NVME_CMBSZ_SET_RDS(n->bar.cmbsz, 1); > - NVME_CMBSZ_SET_WDS(n->bar.cmbsz, 1); > - NVME_CMBSZ_SET_SZU(n->bar.cmbsz, 2); /* MBs */ > - NVME_CMBSZ_SET_SZ(n->bar.cmbsz, n->params.cmb_size_mb); > + id_ns->lbaf[0].ds = BDRV_SECTOR_BITS; > + n->ns_size = bs_size; > > - n->cmbloc = n->bar.cmbloc; > - n->cmbsz = n->bar.cmbsz; > + id_ns->ncap = id_ns->nuse = id_ns->nsze = > + cpu_to_le64(nvme_ns_nlbas(n, ns)); I myself don't know how to align these splits to be honest. I would just split this into multiple statements. > > - n->cmbuf = g_malloc0(NVME_CMBSZ_GETSIZE(n->bar.cmbsz)); > - memory_region_init_io(&n->ctrl_mem, OBJECT(n), &nvme_cmb_ops, n, > - "nvme-cmb", NVME_CMBSZ_GETSIZE(n->bar.cmbsz)); > - pci_register_bar(pci_dev, NVME_CMBLOC_BIR(n->bar.cmbloc), > - PCI_BASE_ADDRESS_SPACE_MEMORY | PCI_BASE_ADDRESS_MEM_TYPE_64 | > - PCI_BASE_ADDRESS_MEM_PREFETCH, &n->ctrl_mem); > + return 0; > +} > > +static void nvme_realize(PCIDevice *pci_dev, Error **errp) > +{ > + NvmeCtrl *n = NVME(pci_dev); > + Error *local_err = NULL; > + int i; > + > + if (nvme_check_constraints(n, &local_err)) { > + error_propagate_prepend(errp, local_err, "nvme_check_constraints: "); Do we need that hint for the end user? > + return; > + } > + > + nvme_init_state(n); > + > + if (nvme_init_blk(n, &local_err)) { > + error_propagate_prepend(errp, local_err, "nvme_init_blk: "); Same here > + return; > } > > for (i = 0; i < n->num_namespaces; i++) { > - NvmeNamespace *ns = &n->namespaces[i]; > - NvmeIdNs *id_ns = &ns->id_ns; > - id_ns->nsfeat = 0; > - id_ns->nlbaf = 0; > - id_ns->flbas = 0; > - id_ns->mc = 0; > - id_ns->dpc = 0; > - id_ns->dps = 0; > - id_ns->lbaf[0].ds = BDRV_SECTOR_BITS; > - id_ns->ncap = id_ns->nuse = id_ns->nsze = > - cpu_to_le64(n->ns_size >> > - id_ns->lbaf[NVME_ID_NS_FLBAS_INDEX(ns->id_ns.flbas)].ds); > + if (nvme_init_namespace(n, &n->namespaces[i], &local_err)) { > + error_propagate_prepend(errp, local_err, "nvme_init_namespace: "); And here > + return; > + } > } > + > + nvme_init_pci(n, pci_dev); > + nvme_init_ctrl(n); > } > > static void nvme_exit(PCIDevice *pci_dev) > diff --git a/hw/block/nvme.h b/hw/block/nvme.h > index 9957c4a200e2..a867bdfabafd 100644 > --- a/hw/block/nvme.h > +++ b/hw/block/nvme.h > @@ -65,6 +65,22 @@ typedef struct NvmeNamespace { > NvmeIdNs id_ns; > } NvmeNamespace; > > +static inline NvmeLBAF nvme_ns_lbaf(NvmeNamespace *ns) > +{ Its not common to return a structure in C, usually pointer is returned to avoid copying. In this case this doesn't matter that much though. > + NvmeIdNs *id_ns = &ns->id_ns; > + return id_ns->lbaf[NVME_ID_NS_FLBAS_INDEX(id_ns->flbas)]; > +} > + > +static inline uint8_t nvme_ns_lbads(NvmeNamespace *ns) > +{ > + return nvme_ns_lbaf(ns).ds; > +} > + > +static inline size_t nvme_ns_lbads_bytes(NvmeNamespace *ns) > +{ > + return 1 << nvme_ns_lbads(ns); > +} > + > #define TYPE_NVME "nvme" > #define NVME(obj) \ > OBJECT_CHECK(NvmeCtrl, (obj), TYPE_NVME) > @@ -101,4 +117,9 @@ typedef struct NvmeCtrl { > NvmeIdCtrl id_ctrl; > } NvmeCtrl; > > +static inline uint64_t nvme_ns_nlbas(NvmeCtrl *n, NvmeNamespace *ns) > +{ > + return n->ns_size >> nvme_ns_lbads(ns); > +} Unless you need all these functions in the future, this feels like it is a bit verbose. > + > #endif /* HW_NVME_H */ Best regards, Maxim Levitsky
On Feb 12 11:27, Maxim Levitsky wrote: > On Tue, 2020-02-04 at 10:51 +0100, Klaus Jensen wrote: > > This patch splits up nvme_realize into multiple individual functions, > > each initializing a different subset of the device. > > > > Signed-off-by: Klaus Jensen <klaus.jensen@cnexlabs.com> > > --- > > hw/block/nvme.c | 175 +++++++++++++++++++++++++++++++----------------- > > hw/block/nvme.h | 21 ++++++ > > 2 files changed, 133 insertions(+), 63 deletions(-) > > > > diff --git a/hw/block/nvme.c b/hw/block/nvme.c > > index e1810260d40b..81514eaef63a 100644 > > --- a/hw/block/nvme.c > > +++ b/hw/block/nvme.c > > @@ -44,6 +44,7 @@ > > #include "nvme.h" > > > > #define NVME_SPEC_VER 0x00010201 > > +#define NVME_MAX_QS PCI_MSIX_FLAGS_QSIZE > > > > #define NVME_GUEST_ERR(trace, fmt, ...) \ > > do { \ > > @@ -1325,67 +1326,106 @@ static const MemoryRegionOps nvme_cmb_ops = { > > }, > > }; > > > > -static void nvme_realize(PCIDevice *pci_dev, Error **errp) > > +static int nvme_check_constraints(NvmeCtrl *n, Error **errp) > > { > > - NvmeCtrl *n = NVME(pci_dev); > > - NvmeIdCtrl *id = &n->id_ctrl; > > - > > - int i; > > - int64_t bs_size; > > - uint8_t *pci_conf; > > - > > - if (!n->params.num_queues) { > > - error_setg(errp, "num_queues can't be zero"); > > - return; > > - } > > + NvmeParams *params = &n->params; > > > > if (!n->conf.blk) { > > - error_setg(errp, "drive property not set"); > > - return; > > + error_setg(errp, "nvme: block backend not configured"); > > + return 1; > As a matter of taste, negative values indicate error, and 0 is the success value. > In Linux kernel this is even an official rule. > > } Fixed. > > > > - bs_size = blk_getlength(n->conf.blk); > > - if (bs_size < 0) { > > - error_setg(errp, "could not get backing file size"); > > - return; > > + if (!params->serial) { > > + error_setg(errp, "nvme: serial not configured"); > > + return 1; > > } > > > > - if (!n->params.serial) { > > - error_setg(errp, "serial property not set"); > > - return; > > + if ((params->num_queues < 1 || params->num_queues > NVME_MAX_QS)) { > > + error_setg(errp, "nvme: invalid queue configuration"); > Maybe something like "nvme: invalid queue count specified, should be between 1 and ..."? > > + return 1; > > } Fixed. > > + > > + return 0; > > +} > > + > > +static int nvme_init_blk(NvmeCtrl *n, Error **errp) > > +{ > > blkconf_blocksizes(&n->conf); > > if (!blkconf_apply_backend_options(&n->conf, blk_is_read_only(n->conf.blk), > > - false, errp)) { > > - return; > > + false, errp)) { > > + return 1; > > } > > > > - pci_conf = pci_dev->config; > > - pci_conf[PCI_INTERRUPT_PIN] = 1; > > - pci_config_set_prog_interface(pci_dev->config, 0x2); > > - pci_config_set_class(pci_dev->config, PCI_CLASS_STORAGE_EXPRESS); > > - pcie_endpoint_cap_init(pci_dev, 0x80); > > + return 0; > > +} > > > > +static void nvme_init_state(NvmeCtrl *n) > > +{ > > n->num_namespaces = 1; > > n->reg_size = pow2ceil(0x1004 + 2 * (n->params.num_queues + 1) * 4); > > Isn't that wrong? > First 4K of mmio (0x1000) is the registers, and that is followed by the doorbells, > and each doorbell takes 8 bytes (assuming regular doorbell stride). > so n->params.num_queues + 1 should be total number of queues, thus the 0x1004 should be 0x1000 IMHO. > I might miss some rounding magic here though. > Yeah. I think you are right. It all becomes slightly more fishy due to the num_queues device parameter being 1's based and accounts for the admin queue pair. But in get/set features, the value has to be 0's based and only account for the I/O queues, so we need to subtract 2 from the value. It's confusing all around. Since the admin queue pair isn't really optional I think it would be better that we introduces a new max_ioqpairs parameter that is 1's based, counts number of pairs and obviously only accounts for the io queues. I guess we need to keep the num_queues parameter around for compatibility. The doorbells are only 4 bytes btw, but the calculation still looks wrong. With a max_ioqpairs parameter in place, the reg_size should be pow2ceil(0x1008 + 2 * (n->params.max_ioqpairs) * 4) Right? Thats 0x1000 for the core registers, 8 bytes for the sq/cq doorbells for the admin queue pair, and then room for the i/o queue pairs. I added a patch for this in v6. > > - n->ns_size = bs_size / (uint64_t)n->num_namespaces; > > - > > n->namespaces = g_new0(NvmeNamespace, n->num_namespaces); > > n->sq = g_new0(NvmeSQueue *, n->params.num_queues); > > n->cq = g_new0(NvmeCQueue *, n->params.num_queues); > > +} > > > > - memory_region_init_io(&n->iomem, OBJECT(n), &nvme_mmio_ops, n, > > - "nvme", n->reg_size); > > +static void nvme_init_cmb(NvmeCtrl *n, PCIDevice *pci_dev) > > +{ > > + NVME_CMBLOC_SET_BIR(n->bar.cmbloc, 2); > It would be nice to have #define for CMB bar number Added. > > + NVME_CMBLOC_SET_OFST(n->bar.cmbloc, 0); > > + > > + NVME_CMBSZ_SET_SQS(n->bar.cmbsz, 1); > > + NVME_CMBSZ_SET_CQS(n->bar.cmbsz, 0); > > + NVME_CMBSZ_SET_LISTS(n->bar.cmbsz, 0); > > + NVME_CMBSZ_SET_RDS(n->bar.cmbsz, 1); > > + NVME_CMBSZ_SET_WDS(n->bar.cmbsz, 1); > > + NVME_CMBSZ_SET_SZU(n->bar.cmbsz, 2); > > + NVME_CMBSZ_SET_SZ(n->bar.cmbsz, n->params.cmb_size_mb); > > + > > + n->cmbloc = n->bar.cmbloc; > > + n->cmbsz = n->bar.cmbsz; > > + > > + n->cmbuf = g_malloc0(NVME_CMBSZ_GETSIZE(n->bar.cmbsz)); > > + memory_region_init_io(&n->ctrl_mem, OBJECT(n), &nvme_cmb_ops, n, > > + "nvme-cmb", NVME_CMBSZ_GETSIZE(n->bar.cmbsz)); > > + pci_register_bar(pci_dev, NVME_CMBLOC_BIR(n->bar.cmbloc), > Same here although since you read it here from the controller register, > then maybe leave it as is. I prefer though for this kind of thing > to have a #define and use it everywhere. > Done. > > + PCI_BASE_ADDRESS_SPACE_MEMORY | PCI_BASE_ADDRESS_MEM_TYPE_64 | > > + PCI_BASE_ADDRESS_MEM_PREFETCH, &n->ctrl_mem); > > +} > > + > > +static void nvme_init_pci(NvmeCtrl *n, PCIDevice *pci_dev) > > +{ > > + uint8_t *pci_conf = pci_dev->config; > > + > > + pci_conf[PCI_INTERRUPT_PIN] = 1; > > + pci_config_set_prog_interface(pci_conf, 0x2); > Nitpick: How about adding some #define for that as well? > (I know that this code is copied as is but still) Yeah. A PCI_PI_NVME or something would be nice. But this should probably go to some pci related header file? Any idea where that would fit? > > + pci_config_set_vendor_id(pci_conf, PCI_VENDOR_ID_INTEL); > > + pci_config_set_device_id(pci_conf, 0x5845); > > + pci_config_set_class(pci_conf, PCI_CLASS_STORAGE_EXPRESS); > > + pcie_endpoint_cap_init(pci_dev, 0x80); > > + > > + memory_region_init_io(&n->iomem, OBJECT(n), &nvme_mmio_ops, n, "nvme", > > + n->reg_size); > > Code on split lines should start at column right after the '(' > Now its my turn to notice this - our checkpatch.pl doesn't check this, > and I can't explain how often I am getting burnt on this myself. > > There are *lot* of these issues, I pointed out some of them but you should > check all the patches for this. > I fixed all that :) > > > pci_register_bar(pci_dev, 0, > > PCI_BASE_ADDRESS_SPACE_MEMORY | PCI_BASE_ADDRESS_MEM_TYPE_64, > > &n->iomem); > Split line alignment issue here as well. > > msix_init_exclusive_bar(pci_dev, n->params.num_queues, 4, NULL); > > > > + if (n->params.cmb_size_mb) { > > + nvme_init_cmb(n, pci_dev); > > + } > > +} > > + > > +static void nvme_init_ctrl(NvmeCtrl *n) > > +{ > > + NvmeIdCtrl *id = &n->id_ctrl; > > + NvmeParams *params = &n->params; > > + uint8_t *pci_conf = n->parent_obj.config; > > + > > id->vid = cpu_to_le16(pci_get_word(pci_conf + PCI_VENDOR_ID)); > > id->ssvid = cpu_to_le16(pci_get_word(pci_conf + PCI_SUBSYSTEM_VENDOR_ID)); > > strpadcpy((char *)id->mn, sizeof(id->mn), "QEMU NVMe Ctrl", ' '); > > strpadcpy((char *)id->fr, sizeof(id->fr), "1.0", ' '); > > - strpadcpy((char *)id->sn, sizeof(id->sn), n->params.serial, ' '); > > + strpadcpy((char *)id->sn, sizeof(id->sn), params->serial, ' '); > > id->rab = 6; > > id->ieee[0] = 0x00; > > id->ieee[1] = 0x02; > > @@ -1431,46 +1471,55 @@ static void nvme_realize(PCIDevice *pci_dev, Error **errp) > > > > n->bar.vs = NVME_SPEC_VER; > > n->bar.intmc = n->bar.intms = 0; > > +} > > > > - if (n->params.cmb_size_mb) { > > +static int nvme_init_namespace(NvmeCtrl *n, NvmeNamespace *ns, Error **errp) > > +{ > > + int64_t bs_size; > > + NvmeIdNs *id_ns = &ns->id_ns; > > > > - NVME_CMBLOC_SET_BIR(n->bar.cmbloc, 2); > > - NVME_CMBLOC_SET_OFST(n->bar.cmbloc, 0); > > + bs_size = blk_getlength(n->conf.blk); > > + if (bs_size < 0) { > > + error_setg_errno(errp, -bs_size, "blk_getlength"); > > + return 1; > > + } > > > > - NVME_CMBSZ_SET_SQS(n->bar.cmbsz, 1); > > - NVME_CMBSZ_SET_CQS(n->bar.cmbsz, 0); > > - NVME_CMBSZ_SET_LISTS(n->bar.cmbsz, 0); > > - NVME_CMBSZ_SET_RDS(n->bar.cmbsz, 1); > > - NVME_CMBSZ_SET_WDS(n->bar.cmbsz, 1); > > - NVME_CMBSZ_SET_SZU(n->bar.cmbsz, 2); /* MBs */ > > - NVME_CMBSZ_SET_SZ(n->bar.cmbsz, n->params.cmb_size_mb); > > + id_ns->lbaf[0].ds = BDRV_SECTOR_BITS; > > + n->ns_size = bs_size; > > > > - n->cmbloc = n->bar.cmbloc; > > - n->cmbsz = n->bar.cmbsz; > > + id_ns->ncap = id_ns->nuse = id_ns->nsze = > > + cpu_to_le64(nvme_ns_nlbas(n, ns)); > I myself don't know how to align these splits to be honest. > I would just split this into multiple statements. > > > > - n->cmbuf = g_malloc0(NVME_CMBSZ_GETSIZE(n->bar.cmbsz)); > > - memory_region_init_io(&n->ctrl_mem, OBJECT(n), &nvme_cmb_ops, n, > > - "nvme-cmb", NVME_CMBSZ_GETSIZE(n->bar.cmbsz)); > > - pci_register_bar(pci_dev, NVME_CMBLOC_BIR(n->bar.cmbloc), > > - PCI_BASE_ADDRESS_SPACE_MEMORY | PCI_BASE_ADDRESS_MEM_TYPE_64 | > > - PCI_BASE_ADDRESS_MEM_PREFETCH, &n->ctrl_mem); > > + return 0; > > +} > > > > +static void nvme_realize(PCIDevice *pci_dev, Error **errp) > > +{ > > + NvmeCtrl *n = NVME(pci_dev); > > + Error *local_err = NULL; > > + int i; > > + > > + if (nvme_check_constraints(n, &local_err)) { > > + error_propagate_prepend(errp, local_err, "nvme_check_constraints: "); > Do we need that hint for the end user? Removed. > > + return; > > + } > > + > > + nvme_init_state(n); > > + > > + if (nvme_init_blk(n, &local_err)) { > > + error_propagate_prepend(errp, local_err, "nvme_init_blk: "); > Same here Done. > > + return; > > } > > > > for (i = 0; i < n->num_namespaces; i++) { > > - NvmeNamespace *ns = &n->namespaces[i]; > > - NvmeIdNs *id_ns = &ns->id_ns; > > - id_ns->nsfeat = 0; > > - id_ns->nlbaf = 0; > > - id_ns->flbas = 0; > > - id_ns->mc = 0; > > - id_ns->dpc = 0; > > - id_ns->dps = 0; > > - id_ns->lbaf[0].ds = BDRV_SECTOR_BITS; > > - id_ns->ncap = id_ns->nuse = id_ns->nsze = > > - cpu_to_le64(n->ns_size >> > > - id_ns->lbaf[NVME_ID_NS_FLBAS_INDEX(ns->id_ns.flbas)].ds); > > + if (nvme_init_namespace(n, &n->namespaces[i], &local_err)) { > > + error_propagate_prepend(errp, local_err, "nvme_init_namespace: "); > And here Done. > > + return; > > + } > > } > > + > > + nvme_init_pci(n, pci_dev); > > + nvme_init_ctrl(n); > > } > > > > static void nvme_exit(PCIDevice *pci_dev) > > diff --git a/hw/block/nvme.h b/hw/block/nvme.h > > index 9957c4a200e2..a867bdfabafd 100644 > > --- a/hw/block/nvme.h > > +++ b/hw/block/nvme.h > > @@ -65,6 +65,22 @@ typedef struct NvmeNamespace { > > NvmeIdNs id_ns; > > } NvmeNamespace; > > > > +static inline NvmeLBAF nvme_ns_lbaf(NvmeNamespace *ns) > > +{ > Its not common to return a structure in C, usually pointer is returned to > avoid copying. In this case this doesn't matter that much though. It's actually gonna be used a lot. So swapped to pointer. > > + NvmeIdNs *id_ns = &ns->id_ns; > > + return id_ns->lbaf[NVME_ID_NS_FLBAS_INDEX(id_ns->flbas)]; > > +} > > + > > +static inline uint8_t nvme_ns_lbads(NvmeNamespace *ns) > > +{ > > + return nvme_ns_lbaf(ns).ds; > > +} > > + > > +static inline size_t nvme_ns_lbads_bytes(NvmeNamespace *ns) > > +{ > > + return 1 << nvme_ns_lbads(ns); > > +} > > + > > #define TYPE_NVME "nvme" > > #define NVME(obj) \ > > OBJECT_CHECK(NvmeCtrl, (obj), TYPE_NVME) > > @@ -101,4 +117,9 @@ typedef struct NvmeCtrl { > > NvmeIdCtrl id_ctrl; > > } NvmeCtrl; > > > > +static inline uint64_t nvme_ns_nlbas(NvmeCtrl *n, NvmeNamespace *ns) > > +{ > > + return n->ns_size >> nvme_ns_lbads(ns); > > +} > Unless you need all these functions in the future, this feels like > it is a bit verbose. > These will be used in various places later.
On Mon, 2020-03-16 at 00:43 -0700, Klaus Birkelund Jensen wrote: > On Feb 12 11:27, Maxim Levitsky wrote: > > On Tue, 2020-02-04 at 10:51 +0100, Klaus Jensen wrote: > > > This patch splits up nvme_realize into multiple individual functions, > > > each initializing a different subset of the device. > > > > > > Signed-off-by: Klaus Jensen <klaus.jensen@cnexlabs.com> > > > --- > > > hw/block/nvme.c | 175 +++++++++++++++++++++++++++++++----------------- > > > hw/block/nvme.h | 21 ++++++ > > > 2 files changed, 133 insertions(+), 63 deletions(-) > > > > > > diff --git a/hw/block/nvme.c b/hw/block/nvme.c > > > index e1810260d40b..81514eaef63a 100644 > > > --- a/hw/block/nvme.c > > > +++ b/hw/block/nvme.c > > > @@ -44,6 +44,7 @@ > > > #include "nvme.h" > > > > > > #define NVME_SPEC_VER 0x00010201 > > > +#define NVME_MAX_QS PCI_MSIX_FLAGS_QSIZE > > > > > > #define NVME_GUEST_ERR(trace, fmt, ...) \ > > > do { \ > > > @@ -1325,67 +1326,106 @@ static const MemoryRegionOps nvme_cmb_ops = { > > > }, > > > }; > > > > > > -static void nvme_realize(PCIDevice *pci_dev, Error **errp) > > > +static int nvme_check_constraints(NvmeCtrl *n, Error **errp) > > > { > > > - NvmeCtrl *n = NVME(pci_dev); > > > - NvmeIdCtrl *id = &n->id_ctrl; > > > - > > > - int i; > > > - int64_t bs_size; > > > - uint8_t *pci_conf; > > > - > > > - if (!n->params.num_queues) { > > > - error_setg(errp, "num_queues can't be zero"); > > > - return; > > > - } > > > + NvmeParams *params = &n->params; > > > > > > if (!n->conf.blk) { > > > - error_setg(errp, "drive property not set"); > > > - return; > > > + error_setg(errp, "nvme: block backend not configured"); > > > + return 1; > > > > As a matter of taste, negative values indicate error, and 0 is the success value. > > In Linux kernel this is even an official rule. > > > } > > Fixed. > > > > > > > - bs_size = blk_getlength(n->conf.blk); > > > - if (bs_size < 0) { > > > - error_setg(errp, "could not get backing file size"); > > > - return; > > > + if (!params->serial) { > > > + error_setg(errp, "nvme: serial not configured"); > > > + return 1; > > > } > > > > > > - if (!n->params.serial) { > > > - error_setg(errp, "serial property not set"); > > > - return; > > > + if ((params->num_queues < 1 || params->num_queues > NVME_MAX_QS)) { > > > + error_setg(errp, "nvme: invalid queue configuration"); > > > > Maybe something like "nvme: invalid queue count specified, should be between 1 and ..."? > > > + return 1; > > > } > > Fixed. Thanks > > > > + > > > + return 0; > > > +} > > > + > > > +static int nvme_init_blk(NvmeCtrl *n, Error **errp) > > > +{ > > > blkconf_blocksizes(&n->conf); > > > if (!blkconf_apply_backend_options(&n->conf, blk_is_read_only(n->conf.blk), > > > - false, errp)) { > > > - return; > > > + false, errp)) { > > > + return 1; > > > } > > > > > > - pci_conf = pci_dev->config; > > > - pci_conf[PCI_INTERRUPT_PIN] = 1; > > > - pci_config_set_prog_interface(pci_dev->config, 0x2); > > > - pci_config_set_class(pci_dev->config, PCI_CLASS_STORAGE_EXPRESS); > > > - pcie_endpoint_cap_init(pci_dev, 0x80); > > > + return 0; > > > +} > > > > > > +static void nvme_init_state(NvmeCtrl *n) > > > +{ > > > n->num_namespaces = 1; > > > n->reg_size = pow2ceil(0x1004 + 2 * (n->params.num_queues + 1) * 4); > > > > Isn't that wrong? > > First 4K of mmio (0x1000) is the registers, and that is followed by the doorbells, > > and each doorbell takes 8 bytes (assuming regular doorbell stride). > > so n->params.num_queues + 1 should be total number of queues, thus the 0x1004 should be 0x1000 IMHO. > > I might miss some rounding magic here though. > > > > Yeah. I think you are right. It all becomes slightly more fishy due to > the num_queues device parameter being 1's based and accounts for the > admin queue pair. > > But in get/set features, the value has to be 0's based and only account > for the I/O queues, so we need to subtract 2 from the value. It's > confusing all around. Yea, I can't agree more on that. The zero based values had bitten me few times while I developed nvme-mdev as well. > > Since the admin queue pair isn't really optional I think it would be > better that we introduces a new max_ioqpairs parameter that is 1's > based, counts number of pairs and obviously only accounts for the io > queues. > > I guess we need to keep the num_queues parameter around for > compatibility. > > The doorbells are only 4 bytes btw, but the calculation still looks I don't understand that. Each doorbell is indeed 4 bytes, but they come in pairs so each doorbell pair is 8 bytes. BTW, the spec has so called doorbell stride, which allows to artificially increase each doorbell by a power of two. This was intended for software implementations (like my nvme-mdev), to make sure that each doorbell takes exactly one cacheline. I personally wasn't able to notice any measurable difference, but then my nvme-mdev adds so little overhead, that it might not be measurable. You might want to support this sometime in the future to increase the feature coverage of this nvme device. > wrong. With a max_ioqpairs parameter in place, the reg_size should be > > pow2ceil(0x1008 + 2 * (n->params.max_ioqpairs) * 4) > > Right? Thats 0x1000 for the core registers, 8 bytes for the sq/cq > doorbells for the admin queue pair, and then room for the i/o queue > pairs. Looks great. BTW, > > I added a patch for this in v6. > > > > - n->ns_size = bs_size / (uint64_t)n->num_namespaces; > > > - > > > n->namespaces = g_new0(NvmeNamespace, n->num_namespaces); > > > n->sq = g_new0(NvmeSQueue *, n->params.num_queues); > > > n->cq = g_new0(NvmeCQueue *, n->params.num_queues); > > > +} > > > > > > - memory_region_init_io(&n->iomem, OBJECT(n), &nvme_mmio_ops, n, > > > - "nvme", n->reg_size); > > > +static void nvme_init_cmb(NvmeCtrl *n, PCIDevice *pci_dev) > > > +{ > > > + NVME_CMBLOC_SET_BIR(n->bar.cmbloc, 2); > > > > It would be nice to have #define for CMB bar number > > Added. Thanks! > > > > + NVME_CMBLOC_SET_OFST(n->bar.cmbloc, 0); > > > + > > > + NVME_CMBSZ_SET_SQS(n->bar.cmbsz, 1); > > > + NVME_CMBSZ_SET_CQS(n->bar.cmbsz, 0); > > > + NVME_CMBSZ_SET_LISTS(n->bar.cmbsz, 0); > > > + NVME_CMBSZ_SET_RDS(n->bar.cmbsz, 1); > > > + NVME_CMBSZ_SET_WDS(n->bar.cmbsz, 1); > > > + NVME_CMBSZ_SET_SZU(n->bar.cmbsz, 2); > > > + NVME_CMBSZ_SET_SZ(n->bar.cmbsz, n->params.cmb_size_mb); > > > + > > > + n->cmbloc = n->bar.cmbloc; > > > + n->cmbsz = n->bar.cmbsz; > > > + > > > + n->cmbuf = g_malloc0(NVME_CMBSZ_GETSIZE(n->bar.cmbsz)); > > > + memory_region_init_io(&n->ctrl_mem, OBJECT(n), &nvme_cmb_ops, n, > > > + "nvme-cmb", NVME_CMBSZ_GETSIZE(n->bar.cmbsz)); > > > + pci_register_bar(pci_dev, NVME_CMBLOC_BIR(n->bar.cmbloc), > > > > Same here although since you read it here from the controller register, > > then maybe leave it as is. I prefer though for this kind of thing > > to have a #define and use it everywhere. > > > > Done. > > > > + PCI_BASE_ADDRESS_SPACE_MEMORY | PCI_BASE_ADDRESS_MEM_TYPE_64 | > > > + PCI_BASE_ADDRESS_MEM_PREFETCH, &n->ctrl_mem); > > > +} > > > + > > > +static void nvme_init_pci(NvmeCtrl *n, PCIDevice *pci_dev) > > > +{ > > > + uint8_t *pci_conf = pci_dev->config; > > > + > > > + pci_conf[PCI_INTERRUPT_PIN] = 1; > > > + pci_config_set_prog_interface(pci_conf, 0x2); > > > > Nitpick: How about adding some #define for that as well? > > (I know that this code is copied as is but still) > > Yeah. A PCI_PI_NVME or something would be nice. But this should probably > go to some pci related header file? Any idea where that would fit? in include/hw/pci/pci_ids.h maybe? > > > > + pci_config_set_vendor_id(pci_conf, PCI_VENDOR_ID_INTEL); > > > + pci_config_set_device_id(pci_conf, 0x5845); > > > + pci_config_set_class(pci_conf, PCI_CLASS_STORAGE_EXPRESS); > > > + pcie_endpoint_cap_init(pci_dev, 0x80); > > > + > > > + memory_region_init_io(&n->iomem, OBJECT(n), &nvme_mmio_ops, n, "nvme", > > > + n->reg_size); > > > > Code on split lines should start at column right after the '(' > > Now its my turn to notice this - our checkpatch.pl doesn't check this, > > and I can't explain how often I am getting burnt on this myself. > > > > There are *lot* of these issues, I pointed out some of them but you should > > check all the patches for this. > > > > I fixed all that :) Thanks, but I bet that some of this remained - taking from my experience, since I also like you wasn't used to this rule, so I didn't yet adopt that rule subconsciously, and our checkpatch.pl doesn't check for it, so I keep on violating this rule in most patches I send despite me checking each patch for few times. I'll go over V6, and if I spot this I'll take a note, now that you fixed most of this issues. Thanks again. > > > > > > pci_register_bar(pci_dev, 0, > > > PCI_BASE_ADDRESS_SPACE_MEMORY | PCI_BASE_ADDRESS_MEM_TYPE_64, > > > &n->iomem); > > > > Split line alignment issue here as well. > > > msix_init_exclusive_bar(pci_dev, n->params.num_queues, 4, NULL); > > > > > > + if (n->params.cmb_size_mb) { > > > + nvme_init_cmb(n, pci_dev); > > > + } > > > +} > > > + > > > +static void nvme_init_ctrl(NvmeCtrl *n) > > > +{ > > > + NvmeIdCtrl *id = &n->id_ctrl; > > > + NvmeParams *params = &n->params; > > > + uint8_t *pci_conf = n->parent_obj.config; > > > + > > > id->vid = cpu_to_le16(pci_get_word(pci_conf + PCI_VENDOR_ID)); > > > id->ssvid = cpu_to_le16(pci_get_word(pci_conf + PCI_SUBSYSTEM_VENDOR_ID)); > > > strpadcpy((char *)id->mn, sizeof(id->mn), "QEMU NVMe Ctrl", ' '); > > > strpadcpy((char *)id->fr, sizeof(id->fr), "1.0", ' '); > > > - strpadcpy((char *)id->sn, sizeof(id->sn), n->params.serial, ' '); > > > + strpadcpy((char *)id->sn, sizeof(id->sn), params->serial, ' '); > > > id->rab = 6; > > > id->ieee[0] = 0x00; > > > id->ieee[1] = 0x02; > > > @@ -1431,46 +1471,55 @@ static void nvme_realize(PCIDevice *pci_dev, Error **errp) > > > > > > n->bar.vs = NVME_SPEC_VER; > > > n->bar.intmc = n->bar.intms = 0; > > > +} > > > > > > - if (n->params.cmb_size_mb) { > > > +static int nvme_init_namespace(NvmeCtrl *n, NvmeNamespace *ns, Error **errp) > > > +{ > > > + int64_t bs_size; > > > + NvmeIdNs *id_ns = &ns->id_ns; > > > > > > - NVME_CMBLOC_SET_BIR(n->bar.cmbloc, 2); > > > - NVME_CMBLOC_SET_OFST(n->bar.cmbloc, 0); > > > + bs_size = blk_getlength(n->conf.blk); > > > + if (bs_size < 0) { > > > + error_setg_errno(errp, -bs_size, "blk_getlength"); > > > + return 1; > > > + } > > > > > > - NVME_CMBSZ_SET_SQS(n->bar.cmbsz, 1); > > > - NVME_CMBSZ_SET_CQS(n->bar.cmbsz, 0); > > > - NVME_CMBSZ_SET_LISTS(n->bar.cmbsz, 0); > > > - NVME_CMBSZ_SET_RDS(n->bar.cmbsz, 1); > > > - NVME_CMBSZ_SET_WDS(n->bar.cmbsz, 1); > > > - NVME_CMBSZ_SET_SZU(n->bar.cmbsz, 2); /* MBs */ > > > - NVME_CMBSZ_SET_SZ(n->bar.cmbsz, n->params.cmb_size_mb); > > > + id_ns->lbaf[0].ds = BDRV_SECTOR_BITS; > > > + n->ns_size = bs_size; > > > > > > - n->cmbloc = n->bar.cmbloc; > > > - n->cmbsz = n->bar.cmbsz; > > > + id_ns->ncap = id_ns->nuse = id_ns->nsze = > > > + cpu_to_le64(nvme_ns_nlbas(n, ns)); > > > > I myself don't know how to align these splits to be honest. > > I would just split this into multiple statements. > > > > > > - n->cmbuf = g_malloc0(NVME_CMBSZ_GETSIZE(n->bar.cmbsz)); > > > - memory_region_init_io(&n->ctrl_mem, OBJECT(n), &nvme_cmb_ops, n, > > > - "nvme-cmb", NVME_CMBSZ_GETSIZE(n->bar.cmbsz)); > > > - pci_register_bar(pci_dev, NVME_CMBLOC_BIR(n->bar.cmbloc), > > > - PCI_BASE_ADDRESS_SPACE_MEMORY | PCI_BASE_ADDRESS_MEM_TYPE_64 | > > > - PCI_BASE_ADDRESS_MEM_PREFETCH, &n->ctrl_mem); > > > + return 0; > > > +} > > > > > > +static void nvme_realize(PCIDevice *pci_dev, Error **errp) > > > +{ > > > + NvmeCtrl *n = NVME(pci_dev); > > > + Error *local_err = NULL; > > > + int i; > > > + > > > + if (nvme_check_constraints(n, &local_err)) { > > > + error_propagate_prepend(errp, local_err, "nvme_check_constraints: "); > > > > Do we need that hint for the end user? > > Removed. > > > > + return; > > > + } > > > + > > > + nvme_init_state(n); > > > + > > > + if (nvme_init_blk(n, &local_err)) { > > > + error_propagate_prepend(errp, local_err, "nvme_init_blk: "); > > > > Same here > > Done. > > > > > + return; > > > } > > > > > > for (i = 0; i < n->num_namespaces; i++) { > > > - NvmeNamespace *ns = &n->namespaces[i]; > > > - NvmeIdNs *id_ns = &ns->id_ns; > > > - id_ns->nsfeat = 0; > > > - id_ns->nlbaf = 0; > > > - id_ns->flbas = 0; > > > - id_ns->mc = 0; > > > - id_ns->dpc = 0; > > > - id_ns->dps = 0; > > > - id_ns->lbaf[0].ds = BDRV_SECTOR_BITS; > > > - id_ns->ncap = id_ns->nuse = id_ns->nsze = > > > - cpu_to_le64(n->ns_size >> > > > - id_ns->lbaf[NVME_ID_NS_FLBAS_INDEX(ns->id_ns.flbas)].ds); > > > + if (nvme_init_namespace(n, &n->namespaces[i], &local_err)) { > > > + error_propagate_prepend(errp, local_err, "nvme_init_namespace: "); > > > > And here > > Done. > > > > > + return; > > > + } > > > } > > > + > > > + nvme_init_pci(n, pci_dev); > > > + nvme_init_ctrl(n); > > > } > > > > > > static void nvme_exit(PCIDevice *pci_dev) > > > diff --git a/hw/block/nvme.h b/hw/block/nvme.h > > > index 9957c4a200e2..a867bdfabafd 100644 > > > --- a/hw/block/nvme.h > > > +++ b/hw/block/nvme.h > > > @@ -65,6 +65,22 @@ typedef struct NvmeNamespace { > > > NvmeIdNs id_ns; > > > } NvmeNamespace; > > > > > > +static inline NvmeLBAF nvme_ns_lbaf(NvmeNamespace *ns) > > > +{ > > > > Its not common to return a structure in C, usually pointer is returned to > > avoid copying. In this case this doesn't matter that much though. > > It's actually gonna be used a lot. So swapped to pointer. Thanks. > > > > + NvmeIdNs *id_ns = &ns->id_ns; > > > + return id_ns->lbaf[NVME_ID_NS_FLBAS_INDEX(id_ns->flbas)]; > > > +} > > > + > > > +static inline uint8_t nvme_ns_lbads(NvmeNamespace *ns) > > > +{ > > > + return nvme_ns_lbaf(ns).ds; > > > +} > > > + > > > +static inline size_t nvme_ns_lbads_bytes(NvmeNamespace *ns) > > > +{ > > > + return 1 << nvme_ns_lbads(ns); > > > +} > > > + > > > #define TYPE_NVME "nvme" > > > #define NVME(obj) \ > > > OBJECT_CHECK(NvmeCtrl, (obj), TYPE_NVME) > > > @@ -101,4 +117,9 @@ typedef struct NvmeCtrl { > > > NvmeIdCtrl id_ctrl; > > > } NvmeCtrl; > > > > > > +static inline uint64_t nvme_ns_nlbas(NvmeCtrl *n, NvmeNamespace *ns) > > > +{ > > > + return n->ns_size >> nvme_ns_lbads(ns); > > > +} > > > > Unless you need all these functions in the future, this feels like > > it is a bit verbose. > > > > These will be used in various places later. OK, then it is all right. > > Best regards, Maxim Levitsky
diff --git a/hw/block/nvme.c b/hw/block/nvme.c index e1810260d40b..81514eaef63a 100644 --- a/hw/block/nvme.c +++ b/hw/block/nvme.c @@ -44,6 +44,7 @@ #include "nvme.h" #define NVME_SPEC_VER 0x00010201 +#define NVME_MAX_QS PCI_MSIX_FLAGS_QSIZE #define NVME_GUEST_ERR(trace, fmt, ...) \ do { \ @@ -1325,67 +1326,106 @@ static const MemoryRegionOps nvme_cmb_ops = { }, }; -static void nvme_realize(PCIDevice *pci_dev, Error **errp) +static int nvme_check_constraints(NvmeCtrl *n, Error **errp) { - NvmeCtrl *n = NVME(pci_dev); - NvmeIdCtrl *id = &n->id_ctrl; - - int i; - int64_t bs_size; - uint8_t *pci_conf; - - if (!n->params.num_queues) { - error_setg(errp, "num_queues can't be zero"); - return; - } + NvmeParams *params = &n->params; if (!n->conf.blk) { - error_setg(errp, "drive property not set"); - return; + error_setg(errp, "nvme: block backend not configured"); + return 1; } - bs_size = blk_getlength(n->conf.blk); - if (bs_size < 0) { - error_setg(errp, "could not get backing file size"); - return; + if (!params->serial) { + error_setg(errp, "nvme: serial not configured"); + return 1; } - if (!n->params.serial) { - error_setg(errp, "serial property not set"); - return; + if ((params->num_queues < 1 || params->num_queues > NVME_MAX_QS)) { + error_setg(errp, "nvme: invalid queue configuration"); + return 1; } + + return 0; +} + +static int nvme_init_blk(NvmeCtrl *n, Error **errp) +{ blkconf_blocksizes(&n->conf); if (!blkconf_apply_backend_options(&n->conf, blk_is_read_only(n->conf.blk), - false, errp)) { - return; + false, errp)) { + return 1; } - pci_conf = pci_dev->config; - pci_conf[PCI_INTERRUPT_PIN] = 1; - pci_config_set_prog_interface(pci_dev->config, 0x2); - pci_config_set_class(pci_dev->config, PCI_CLASS_STORAGE_EXPRESS); - pcie_endpoint_cap_init(pci_dev, 0x80); + return 0; +} +static void nvme_init_state(NvmeCtrl *n) +{ n->num_namespaces = 1; n->reg_size = pow2ceil(0x1004 + 2 * (n->params.num_queues + 1) * 4); - n->ns_size = bs_size / (uint64_t)n->num_namespaces; - n->namespaces = g_new0(NvmeNamespace, n->num_namespaces); n->sq = g_new0(NvmeSQueue *, n->params.num_queues); n->cq = g_new0(NvmeCQueue *, n->params.num_queues); +} - memory_region_init_io(&n->iomem, OBJECT(n), &nvme_mmio_ops, n, - "nvme", n->reg_size); +static void nvme_init_cmb(NvmeCtrl *n, PCIDevice *pci_dev) +{ + NVME_CMBLOC_SET_BIR(n->bar.cmbloc, 2); + NVME_CMBLOC_SET_OFST(n->bar.cmbloc, 0); + + NVME_CMBSZ_SET_SQS(n->bar.cmbsz, 1); + NVME_CMBSZ_SET_CQS(n->bar.cmbsz, 0); + NVME_CMBSZ_SET_LISTS(n->bar.cmbsz, 0); + NVME_CMBSZ_SET_RDS(n->bar.cmbsz, 1); + NVME_CMBSZ_SET_WDS(n->bar.cmbsz, 1); + NVME_CMBSZ_SET_SZU(n->bar.cmbsz, 2); + NVME_CMBSZ_SET_SZ(n->bar.cmbsz, n->params.cmb_size_mb); + + n->cmbloc = n->bar.cmbloc; + n->cmbsz = n->bar.cmbsz; + + n->cmbuf = g_malloc0(NVME_CMBSZ_GETSIZE(n->bar.cmbsz)); + memory_region_init_io(&n->ctrl_mem, OBJECT(n), &nvme_cmb_ops, n, + "nvme-cmb", NVME_CMBSZ_GETSIZE(n->bar.cmbsz)); + pci_register_bar(pci_dev, NVME_CMBLOC_BIR(n->bar.cmbloc), + PCI_BASE_ADDRESS_SPACE_MEMORY | PCI_BASE_ADDRESS_MEM_TYPE_64 | + PCI_BASE_ADDRESS_MEM_PREFETCH, &n->ctrl_mem); +} + +static void nvme_init_pci(NvmeCtrl *n, PCIDevice *pci_dev) +{ + uint8_t *pci_conf = pci_dev->config; + + pci_conf[PCI_INTERRUPT_PIN] = 1; + pci_config_set_prog_interface(pci_conf, 0x2); + pci_config_set_vendor_id(pci_conf, PCI_VENDOR_ID_INTEL); + pci_config_set_device_id(pci_conf, 0x5845); + pci_config_set_class(pci_conf, PCI_CLASS_STORAGE_EXPRESS); + pcie_endpoint_cap_init(pci_dev, 0x80); + + memory_region_init_io(&n->iomem, OBJECT(n), &nvme_mmio_ops, n, "nvme", + n->reg_size); pci_register_bar(pci_dev, 0, PCI_BASE_ADDRESS_SPACE_MEMORY | PCI_BASE_ADDRESS_MEM_TYPE_64, &n->iomem); msix_init_exclusive_bar(pci_dev, n->params.num_queues, 4, NULL); + if (n->params.cmb_size_mb) { + nvme_init_cmb(n, pci_dev); + } +} + +static void nvme_init_ctrl(NvmeCtrl *n) +{ + NvmeIdCtrl *id = &n->id_ctrl; + NvmeParams *params = &n->params; + uint8_t *pci_conf = n->parent_obj.config; + id->vid = cpu_to_le16(pci_get_word(pci_conf + PCI_VENDOR_ID)); id->ssvid = cpu_to_le16(pci_get_word(pci_conf + PCI_SUBSYSTEM_VENDOR_ID)); strpadcpy((char *)id->mn, sizeof(id->mn), "QEMU NVMe Ctrl", ' '); strpadcpy((char *)id->fr, sizeof(id->fr), "1.0", ' '); - strpadcpy((char *)id->sn, sizeof(id->sn), n->params.serial, ' '); + strpadcpy((char *)id->sn, sizeof(id->sn), params->serial, ' '); id->rab = 6; id->ieee[0] = 0x00; id->ieee[1] = 0x02; @@ -1431,46 +1471,55 @@ static void nvme_realize(PCIDevice *pci_dev, Error **errp) n->bar.vs = NVME_SPEC_VER; n->bar.intmc = n->bar.intms = 0; +} - if (n->params.cmb_size_mb) { +static int nvme_init_namespace(NvmeCtrl *n, NvmeNamespace *ns, Error **errp) +{ + int64_t bs_size; + NvmeIdNs *id_ns = &ns->id_ns; - NVME_CMBLOC_SET_BIR(n->bar.cmbloc, 2); - NVME_CMBLOC_SET_OFST(n->bar.cmbloc, 0); + bs_size = blk_getlength(n->conf.blk); + if (bs_size < 0) { + error_setg_errno(errp, -bs_size, "blk_getlength"); + return 1; + } - NVME_CMBSZ_SET_SQS(n->bar.cmbsz, 1); - NVME_CMBSZ_SET_CQS(n->bar.cmbsz, 0); - NVME_CMBSZ_SET_LISTS(n->bar.cmbsz, 0); - NVME_CMBSZ_SET_RDS(n->bar.cmbsz, 1); - NVME_CMBSZ_SET_WDS(n->bar.cmbsz, 1); - NVME_CMBSZ_SET_SZU(n->bar.cmbsz, 2); /* MBs */ - NVME_CMBSZ_SET_SZ(n->bar.cmbsz, n->params.cmb_size_mb); + id_ns->lbaf[0].ds = BDRV_SECTOR_BITS; + n->ns_size = bs_size; - n->cmbloc = n->bar.cmbloc; - n->cmbsz = n->bar.cmbsz; + id_ns->ncap = id_ns->nuse = id_ns->nsze = + cpu_to_le64(nvme_ns_nlbas(n, ns)); - n->cmbuf = g_malloc0(NVME_CMBSZ_GETSIZE(n->bar.cmbsz)); - memory_region_init_io(&n->ctrl_mem, OBJECT(n), &nvme_cmb_ops, n, - "nvme-cmb", NVME_CMBSZ_GETSIZE(n->bar.cmbsz)); - pci_register_bar(pci_dev, NVME_CMBLOC_BIR(n->bar.cmbloc), - PCI_BASE_ADDRESS_SPACE_MEMORY | PCI_BASE_ADDRESS_MEM_TYPE_64 | - PCI_BASE_ADDRESS_MEM_PREFETCH, &n->ctrl_mem); + return 0; +} +static void nvme_realize(PCIDevice *pci_dev, Error **errp) +{ + NvmeCtrl *n = NVME(pci_dev); + Error *local_err = NULL; + int i; + + if (nvme_check_constraints(n, &local_err)) { + error_propagate_prepend(errp, local_err, "nvme_check_constraints: "); + return; + } + + nvme_init_state(n); + + if (nvme_init_blk(n, &local_err)) { + error_propagate_prepend(errp, local_err, "nvme_init_blk: "); + return; } for (i = 0; i < n->num_namespaces; i++) { - NvmeNamespace *ns = &n->namespaces[i]; - NvmeIdNs *id_ns = &ns->id_ns; - id_ns->nsfeat = 0; - id_ns->nlbaf = 0; - id_ns->flbas = 0; - id_ns->mc = 0; - id_ns->dpc = 0; - id_ns->dps = 0; - id_ns->lbaf[0].ds = BDRV_SECTOR_BITS; - id_ns->ncap = id_ns->nuse = id_ns->nsze = - cpu_to_le64(n->ns_size >> - id_ns->lbaf[NVME_ID_NS_FLBAS_INDEX(ns->id_ns.flbas)].ds); + if (nvme_init_namespace(n, &n->namespaces[i], &local_err)) { + error_propagate_prepend(errp, local_err, "nvme_init_namespace: "); + return; + } } + + nvme_init_pci(n, pci_dev); + nvme_init_ctrl(n); } static void nvme_exit(PCIDevice *pci_dev) diff --git a/hw/block/nvme.h b/hw/block/nvme.h index 9957c4a200e2..a867bdfabafd 100644 --- a/hw/block/nvme.h +++ b/hw/block/nvme.h @@ -65,6 +65,22 @@ typedef struct NvmeNamespace { NvmeIdNs id_ns; } NvmeNamespace; +static inline NvmeLBAF nvme_ns_lbaf(NvmeNamespace *ns) +{ + NvmeIdNs *id_ns = &ns->id_ns; + return id_ns->lbaf[NVME_ID_NS_FLBAS_INDEX(id_ns->flbas)]; +} + +static inline uint8_t nvme_ns_lbads(NvmeNamespace *ns) +{ + return nvme_ns_lbaf(ns).ds; +} + +static inline size_t nvme_ns_lbads_bytes(NvmeNamespace *ns) +{ + return 1 << nvme_ns_lbads(ns); +} + #define TYPE_NVME "nvme" #define NVME(obj) \ OBJECT_CHECK(NvmeCtrl, (obj), TYPE_NVME) @@ -101,4 +117,9 @@ typedef struct NvmeCtrl { NvmeIdCtrl id_ctrl; } NvmeCtrl; +static inline uint64_t nvme_ns_nlbas(NvmeCtrl *n, NvmeNamespace *ns) +{ + return n->ns_size >> nvme_ns_lbads(ns); +} + #endif /* HW_NVME_H */
This patch splits up nvme_realize into multiple individual functions, each initializing a different subset of the device. Signed-off-by: Klaus Jensen <klaus.jensen@cnexlabs.com> --- hw/block/nvme.c | 175 +++++++++++++++++++++++++++++++----------------- hw/block/nvme.h | 21 ++++++ 2 files changed, 133 insertions(+), 63 deletions(-)